{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "导包完毕\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import pickle\n",
    "print(\"导包完毕\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(13887, 29)\n",
      "(12955, 28)\n",
      "读取数据成功\n"
     ]
    }
   ],
   "source": [
    "with open('./df_train.pkl','rb') as file:\n",
    "    df_train=pickle.load(file)\n",
    "    \n",
    "with open('./df_test.pkl','rb') as file:\n",
    "    df_test=pickle.load(file)\n",
    "print(df_train.shape)\n",
    "print(df_test.shape)\n",
    "print(\"读取数据成功\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>...</th>\n",
       "      <th>tid_std</th>\n",
       "      <th>tid_ptp</th>\n",
       "      <th>index_count</th>\n",
       "      <th>index_nunique</th>\n",
       "      <th>index_min</th>\n",
       "      <th>index_max</th>\n",
       "      <th>index_mean</th>\n",
       "      <th>index_median</th>\n",
       "      <th>index_std</th>\n",
       "      <th>index_ptp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>135</td>\n",
       "      <td>2488</td>\n",
       "      <td>0</td>\n",
       "      <td>6786</td>\n",
       "      <td>116</td>\n",
       "      <td>6</td>\n",
       "      <td>298</td>\n",
       "      <td>171.965223</td>\n",
       "      <td>...</td>\n",
       "      <td>83.881299</td>\n",
       "      <td>324</td>\n",
       "      <td>6786</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>2000.806955</td>\n",
       "      <td>1607.5</td>\n",
       "      <td>1510.694221</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6786</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>2320</td>\n",
       "      <td>0</td>\n",
       "      <td>816</td>\n",
       "      <td>30</td>\n",
       "      <td>89</td>\n",
       "      <td>298</td>\n",
       "      <td>159.696078</td>\n",
       "      <td>...</td>\n",
       "      <td>101.506783</td>\n",
       "      <td>284</td>\n",
       "      <td>816</td>\n",
       "      <td>204</td>\n",
       "      <td>0</td>\n",
       "      <td>203</td>\n",
       "      <td>101.500000</td>\n",
       "      <td>101.5</td>\n",
       "      <td>58.925137</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7602</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2208</td>\n",
       "      <td>0</td>\n",
       "      <td>463</td>\n",
       "      <td>42</td>\n",
       "      <td>9</td>\n",
       "      <td>258</td>\n",
       "      <td>164.948164</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>463</td>\n",
       "      <td>463</td>\n",
       "      <td>0</td>\n",
       "      <td>462</td>\n",
       "      <td>231.000000</td>\n",
       "      <td>231.0</td>\n",
       "      <td>133.800847</td>\n",
       "      <td>462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8065</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>95</td>\n",
       "      <td>2284</td>\n",
       "      <td>0</td>\n",
       "      <td>2046</td>\n",
       "      <td>51</td>\n",
       "      <td>9</td>\n",
       "      <td>257</td>\n",
       "      <td>154.939883</td>\n",
       "      <td>...</td>\n",
       "      <td>150.460506</td>\n",
       "      <td>696</td>\n",
       "      <td>2046</td>\n",
       "      <td>1028</td>\n",
       "      <td>0</td>\n",
       "      <td>1027</td>\n",
       "      <td>511.012219</td>\n",
       "      <td>511.0</td>\n",
       "      <td>295.407885</td>\n",
       "      <td>1027</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10111</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>249</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>10002</td>\n",
       "      <td>65</td>\n",
       "      <td>6</td>\n",
       "      <td>254</td>\n",
       "      <td>201.893421</td>\n",
       "      <td>...</td>\n",
       "      <td>49.556301</td>\n",
       "      <td>176</td>\n",
       "      <td>10002</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>2500.000000</td>\n",
       "      <td>2500.0</td>\n",
       "      <td>1443.736493</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89620181</th>\n",
       "      <td>13883</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>178221</td>\n",
       "      <td>71</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>156.643100</td>\n",
       "      <td>...</td>\n",
       "      <td>1405.045515</td>\n",
       "      <td>6468</td>\n",
       "      <td>178221</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>401.480987</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1008.636040</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89798402</th>\n",
       "      <td>13884</td>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2592</td>\n",
       "      <td>0</td>\n",
       "      <td>1319</td>\n",
       "      <td>39</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>163.025019</td>\n",
       "      <td>...</td>\n",
       "      <td>4.295386</td>\n",
       "      <td>156</td>\n",
       "      <td>1319</td>\n",
       "      <td>1319</td>\n",
       "      <td>0</td>\n",
       "      <td>1318</td>\n",
       "      <td>659.000000</td>\n",
       "      <td>659.0</td>\n",
       "      <td>380.906813</td>\n",
       "      <td>1318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89799721</th>\n",
       "      <td>13885</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2240</td>\n",
       "      <td>0</td>\n",
       "      <td>1033</td>\n",
       "      <td>71</td>\n",
       "      <td>8</td>\n",
       "      <td>259</td>\n",
       "      <td>174.896418</td>\n",
       "      <td>...</td>\n",
       "      <td>33.152020</td>\n",
       "      <td>504</td>\n",
       "      <td>1033</td>\n",
       "      <td>1033</td>\n",
       "      <td>0</td>\n",
       "      <td>1032</td>\n",
       "      <td>516.000000</td>\n",
       "      <td>516.0</td>\n",
       "      <td>298.345717</td>\n",
       "      <td>1032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89800754</th>\n",
       "      <td>13886</td>\n",
       "      <td>1</td>\n",
       "      <td>95</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5316</td>\n",
       "      <td>80</td>\n",
       "      <td>9</td>\n",
       "      <td>281</td>\n",
       "      <td>168.313017</td>\n",
       "      <td>...</td>\n",
       "      <td>154.796790</td>\n",
       "      <td>512</td>\n",
       "      <td>5316</td>\n",
       "      <td>2503</td>\n",
       "      <td>0</td>\n",
       "      <td>2502</td>\n",
       "      <td>1173.050414</td>\n",
       "      <td>1165.5</td>\n",
       "      <td>755.545651</td>\n",
       "      <td>2502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89806070</th>\n",
       "      <td>13887</td>\n",
       "      <td>2</td>\n",
       "      <td>135</td>\n",
       "      <td>2336</td>\n",
       "      <td>0</td>\n",
       "      <td>623</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>277</td>\n",
       "      <td>139.784912</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>623</td>\n",
       "      <td>623</td>\n",
       "      <td>0</td>\n",
       "      <td>622</td>\n",
       "      <td>311.000000</td>\n",
       "      <td>311.0</td>\n",
       "      <td>179.988889</td>\n",
       "      <td>622</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13887 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          file_id  label  api   tid  index  api_count  api_nunique  api_min  \\\n",
       "0               1      5  135  2488      0       6786          116        6   \n",
       "6786            2      2   95  2320      0        816           30       89   \n",
       "7602            3      0  151  2208      0        463           42        9   \n",
       "8065            4      0   95  2284      0       2046           51        9   \n",
       "10111           5      0  249  2500      0      10002           65        6   \n",
       "...           ...    ...  ...   ...    ...        ...          ...      ...   \n",
       "89620181    13883      2   95   100      0     178221           71        6   \n",
       "89798402    13884      5   95  2592      0       1319           39        6   \n",
       "89799721    13885      0  151  2240      0       1033           71        8   \n",
       "89800754    13886      1   95  2324      0       5316           80        9   \n",
       "89806070    13887      2  135  2336      0        623           37       11   \n",
       "\n",
       "          api_max    api_mean  ...      tid_std  tid_ptp  index_count  \\\n",
       "0             298  171.965223  ...    83.881299      324         6786   \n",
       "6786          298  159.696078  ...   101.506783      284          816   \n",
       "7602          258  164.948164  ...     0.000000        0          463   \n",
       "8065          257  154.939883  ...   150.460506      696         2046   \n",
       "10111         254  201.893421  ...    49.556301      176        10002   \n",
       "...           ...         ...  ...          ...      ...          ...   \n",
       "89620181      279  156.643100  ...  1405.045515     6468       178221   \n",
       "89798402      279  163.025019  ...     4.295386      156         1319   \n",
       "89799721      259  174.896418  ...    33.152020      504         1033   \n",
       "89800754      281  168.313017  ...   154.796790      512         5316   \n",
       "89806070      277  139.784912  ...     0.000000        0          623   \n",
       "\n",
       "          index_nunique  index_min  index_max   index_mean  index_median  \\\n",
       "0                  5001          0       5000  2000.806955        1607.5   \n",
       "6786                204          0        203   101.500000         101.5   \n",
       "7602                463          0        462   231.000000         231.0   \n",
       "8065               1028          0       1027   511.012219         511.0   \n",
       "10111              5001          0       5000  2500.000000        2500.0   \n",
       "...                 ...        ...        ...          ...           ...   \n",
       "89620181           5001          0       5000   401.480987          47.0   \n",
       "89798402           1319          0       1318   659.000000         659.0   \n",
       "89799721           1033          0       1032   516.000000         516.0   \n",
       "89800754           2503          0       2502  1173.050414        1165.5   \n",
       "89806070            623          0        622   311.000000         311.0   \n",
       "\n",
       "            index_std  index_ptp  \n",
       "0         1510.694221       5000  \n",
       "6786        58.925137        203  \n",
       "7602       133.800847        462  \n",
       "8065       295.407885       1027  \n",
       "10111     1443.736493       5000  \n",
       "...               ...        ...  \n",
       "89620181  1008.636040       5000  \n",
       "89798402   380.906813       1318  \n",
       "89799721   298.345717       1032  \n",
       "89800754   755.545651       2502  \n",
       "89806070   179.988889        622  \n",
       "\n",
       "[13887 rows x 29 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['file_id', 'label', 'api', 'tid', 'index', 'api_count', 'api_nunique',\n",
       "       'api_min', 'api_max', 'api_mean', 'api_median', 'api_std', 'api_ptp',\n",
       "       'tid_count', 'tid_nunique', 'tid_min', 'tid_max', 'tid_mean',\n",
       "       'tid_median', 'tid_std', 'tid_ptp', 'index_count', 'index_nunique',\n",
       "       'index_min', 'index_max', 'index_mean', 'index_median', 'index_std',\n",
       "       'index_ptp'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "29 28\n"
     ]
    }
   ],
   "source": [
    "print(len(df_train.columns),len(df_test.columns))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['file_id', 'api', 'tid', 'index', 'api_count', 'api_nunique', 'api_min',\n",
       "       'api_max', 'api_mean', 'api_median', 'api_std', 'api_ptp', 'tid_count',\n",
       "       'tid_nunique', 'tid_min', 'tid_max', 'tid_mean', 'tid_median',\n",
       "       'tid_std', 'tid_ptp', 'index_count', 'index_nunique', 'index_min',\n",
       "       'index_max', 'index_mean', 'index_median', 'index_std', 'index_ptp'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>api_median</th>\n",
       "      <th>...</th>\n",
       "      <th>tid_std</th>\n",
       "      <th>tid_ptp</th>\n",
       "      <th>index_count</th>\n",
       "      <th>index_nunique</th>\n",
       "      <th>index_min</th>\n",
       "      <th>index_max</th>\n",
       "      <th>index_mean</th>\n",
       "      <th>index_median</th>\n",
       "      <th>index_std</th>\n",
       "      <th>index_ptp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>226</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>97</td>\n",
       "      <td>15</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>155.989691</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>57.218548</td>\n",
       "      <td>236</td>\n",
       "      <td>97</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>14.443299</td>\n",
       "      <td>14.0</td>\n",
       "      <td>9.210466</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>2</td>\n",
       "      <td>226</td>\n",
       "      <td>2472</td>\n",
       "      <td>0</td>\n",
       "      <td>1361</td>\n",
       "      <td>40</td>\n",
       "      <td>6</td>\n",
       "      <td>261</td>\n",
       "      <td>138.025716</td>\n",
       "      <td>138.0</td>\n",
       "      <td>...</td>\n",
       "      <td>104.399149</td>\n",
       "      <td>276</td>\n",
       "      <td>1361</td>\n",
       "      <td>681</td>\n",
       "      <td>0</td>\n",
       "      <td>680</td>\n",
       "      <td>339.750184</td>\n",
       "      <td>340.0</td>\n",
       "      <td>196.515744</td>\n",
       "      <td>680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1458</th>\n",
       "      <td>3</td>\n",
       "      <td>95</td>\n",
       "      <td>2344</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>9</td>\n",
       "      <td>16</td>\n",
       "      <td>257</td>\n",
       "      <td>111.375000</td>\n",
       "      <td>134.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>7.5</td>\n",
       "      <td>4.760952</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1474</th>\n",
       "      <td>4</td>\n",
       "      <td>135</td>\n",
       "      <td>2452</td>\n",
       "      <td>0</td>\n",
       "      <td>193</td>\n",
       "      <td>34</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>172.217617</td>\n",
       "      <td>170.0</td>\n",
       "      <td>...</td>\n",
       "      <td>50.951508</td>\n",
       "      <td>132</td>\n",
       "      <td>193</td>\n",
       "      <td>193</td>\n",
       "      <td>0</td>\n",
       "      <td>192</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>96.0</td>\n",
       "      <td>55.858452</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1667</th>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>803</td>\n",
       "      <td>34</td>\n",
       "      <td>16</td>\n",
       "      <td>261</td>\n",
       "      <td>168.490660</td>\n",
       "      <td>153.0</td>\n",
       "      <td>...</td>\n",
       "      <td>201.826813</td>\n",
       "      <td>448</td>\n",
       "      <td>803</td>\n",
       "      <td>268</td>\n",
       "      <td>0</td>\n",
       "      <td>267</td>\n",
       "      <td>133.333748</td>\n",
       "      <td>133.0</td>\n",
       "      <td>77.317048</td>\n",
       "      <td>267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79277890</th>\n",
       "      <td>12951</td>\n",
       "      <td>151</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>289</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "      <td>269</td>\n",
       "      <td>140.536332</td>\n",
       "      <td>151.0</td>\n",
       "      <td>...</td>\n",
       "      <td>75.402526</td>\n",
       "      <td>336</td>\n",
       "      <td>289</td>\n",
       "      <td>145</td>\n",
       "      <td>0</td>\n",
       "      <td>144</td>\n",
       "      <td>71.750865</td>\n",
       "      <td>72.0</td>\n",
       "      <td>41.786414</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79278179</th>\n",
       "      <td>12952</td>\n",
       "      <td>151</td>\n",
       "      <td>2264</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>28</td>\n",
       "      <td>56</td>\n",
       "      <td>261</td>\n",
       "      <td>163.669643</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>112</td>\n",
       "      <td>0</td>\n",
       "      <td>111</td>\n",
       "      <td>55.500000</td>\n",
       "      <td>55.5</td>\n",
       "      <td>32.475632</td>\n",
       "      <td>111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79278291</th>\n",
       "      <td>12953</td>\n",
       "      <td>135</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5095</td>\n",
       "      <td>72</td>\n",
       "      <td>6</td>\n",
       "      <td>286</td>\n",
       "      <td>200.063199</td>\n",
       "      <td>214.0</td>\n",
       "      <td>...</td>\n",
       "      <td>196.695730</td>\n",
       "      <td>560</td>\n",
       "      <td>5095</td>\n",
       "      <td>1464</td>\n",
       "      <td>0</td>\n",
       "      <td>1463</td>\n",
       "      <td>538.423749</td>\n",
       "      <td>454.0</td>\n",
       "      <td>393.605016</td>\n",
       "      <td>1463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79283386</th>\n",
       "      <td>12954</td>\n",
       "      <td>135</td>\n",
       "      <td>2424</td>\n",
       "      <td>0</td>\n",
       "      <td>2951</td>\n",
       "      <td>65</td>\n",
       "      <td>9</td>\n",
       "      <td>298</td>\n",
       "      <td>191.007794</td>\n",
       "      <td>139.0</td>\n",
       "      <td>...</td>\n",
       "      <td>126.124152</td>\n",
       "      <td>276</td>\n",
       "      <td>2951</td>\n",
       "      <td>1445</td>\n",
       "      <td>0</td>\n",
       "      <td>1444</td>\n",
       "      <td>596.701796</td>\n",
       "      <td>555.0</td>\n",
       "      <td>397.358069</td>\n",
       "      <td>1444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79286337</th>\n",
       "      <td>12955</td>\n",
       "      <td>135</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>2038</td>\n",
       "      <td>54</td>\n",
       "      <td>13</td>\n",
       "      <td>284</td>\n",
       "      <td>208.845927</td>\n",
       "      <td>266.0</td>\n",
       "      <td>...</td>\n",
       "      <td>78.912837</td>\n",
       "      <td>240</td>\n",
       "      <td>2038</td>\n",
       "      <td>1451</td>\n",
       "      <td>0</td>\n",
       "      <td>1450</td>\n",
       "      <td>560.742885</td>\n",
       "      <td>431.5</td>\n",
       "      <td>440.983364</td>\n",
       "      <td>1450</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          file_id  api   tid  index  api_count  api_nunique  api_min  api_max  \\\n",
       "0               1  226  2332      0         97           15       13      262   \n",
       "97              2  226  2472      0       1361           40        6      261   \n",
       "1458            3   95  2344      0         16            9       16      257   \n",
       "1474            4  135  2452      0        193           34       13      262   \n",
       "1667            5   95  2332      0        803           34       16      261   \n",
       "...           ...  ...   ...    ...        ...          ...      ...      ...   \n",
       "79277890    12951  151  2644      0        289           37        9      269   \n",
       "79278179    12952  151  2264      0        112           28       56      261   \n",
       "79278291    12953  135  2324      0       5095           72        6      286   \n",
       "79283386    12954  135  2424      0       2951           65        9      298   \n",
       "79286337    12955  135  2500      0       2038           54       13      284   \n",
       "\n",
       "            api_mean  api_median  ...     tid_std  tid_ptp  index_count  \\\n",
       "0         155.989691       152.0  ...   57.218548      236           97   \n",
       "97        138.025716       138.0  ...  104.399149      276         1361   \n",
       "1458      111.375000       134.0  ...    0.000000        0           16   \n",
       "1474      172.217617       170.0  ...   50.951508      132          193   \n",
       "1667      168.490660       153.0  ...  201.826813      448          803   \n",
       "...              ...         ...  ...         ...      ...          ...   \n",
       "79277890  140.536332       151.0  ...   75.402526      336          289   \n",
       "79278179  163.669643       152.0  ...    0.000000        0          112   \n",
       "79278291  200.063199       214.0  ...  196.695730      560         5095   \n",
       "79283386  191.007794       139.0  ...  126.124152      276         2951   \n",
       "79286337  208.845927       266.0  ...   78.912837      240         2038   \n",
       "\n",
       "          index_nunique  index_min  index_max  index_mean  index_median  \\\n",
       "0                    31          0         30   14.443299          14.0   \n",
       "97                  681          0        680  339.750184         340.0   \n",
       "1458                 16          0         15    7.500000           7.5   \n",
       "1474                193          0        192   96.000000          96.0   \n",
       "1667                268          0        267  133.333748         133.0   \n",
       "...                 ...        ...        ...         ...           ...   \n",
       "79277890            145          0        144   71.750865          72.0   \n",
       "79278179            112          0        111   55.500000          55.5   \n",
       "79278291           1464          0       1463  538.423749         454.0   \n",
       "79283386           1445          0       1444  596.701796         555.0   \n",
       "79286337           1451          0       1450  560.742885         431.5   \n",
       "\n",
       "           index_std  index_ptp  \n",
       "0           9.210466         30  \n",
       "97        196.515744        680  \n",
       "1458        4.760952         15  \n",
       "1474       55.858452        192  \n",
       "1667       77.317048        267  \n",
       "...              ...        ...  \n",
       "79277890   41.786414        144  \n",
       "79278179   32.475632        111  \n",
       "79278291  393.605016       1463  \n",
       "79283386  397.358069       1444  \n",
       "79286337  440.983364       1450  \n",
       "\n",
       "[12955 rows x 28 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(26842, 7614) 读取数据成功\n"
     ]
    }
   ],
   "source": [
    "#读取一下api\n",
    "with open('./df_apis.pkl','rb') as file:\n",
    "    df_apis=pickle.load(file)\n",
    "print(df_apis.shape,\"读取数据成功\")#这个就是那个apis那生成的TFIDF矩阵 列名columns就是TFIDF的featrue_name 内容是对应的矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>__exception__</th>\n",
       "      <th>__exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ ldrloaddll</th>\n",
       "      <th>__exception__ __exception__ ntallocatevirtualmemory</th>\n",
       "      <th>__exception__ __exception__ ntclose</th>\n",
       "      <th>__exception__ __exception__ ntdelayexecution</th>\n",
       "      <th>__exception__ couninitialize</th>\n",
       "      <th>__exception__ couninitialize ldrunloaddll</th>\n",
       "      <th>__exception__ getsysteminfo</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13887</th>\n",
       "      <td>0.000291</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13888</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13889</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13890</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13891</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26837</th>\n",
       "      <td>0.000343</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26838</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26839</th>\n",
       "      <td>0.000351</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26840</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26841</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 7614 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       __exception__  __exception__ __exception__  \\\n",
       "13887       0.000291                          0.0   \n",
       "13888       0.000000                          0.0   \n",
       "13889       0.000000                          0.0   \n",
       "13890       0.000000                          0.0   \n",
       "13891       0.000000                          0.0   \n",
       "...              ...                          ...   \n",
       "26837       0.000343                          0.0   \n",
       "26838       0.000000                          0.0   \n",
       "26839       0.000351                          0.0   \n",
       "26840       0.000000                          0.0   \n",
       "26841       0.000000                          0.0   \n",
       "\n",
       "       __exception__ __exception__ __exception__  \\\n",
       "13887                                        0.0   \n",
       "13888                                        0.0   \n",
       "13889                                        0.0   \n",
       "13890                                        0.0   \n",
       "13891                                        0.0   \n",
       "...                                          ...   \n",
       "26837                                        0.0   \n",
       "26838                                        0.0   \n",
       "26839                                        0.0   \n",
       "26840                                        0.0   \n",
       "26841                                        0.0   \n",
       "\n",
       "       __exception__ __exception__ ldrloaddll  \\\n",
       "13887                                     0.0   \n",
       "13888                                     0.0   \n",
       "13889                                     0.0   \n",
       "13890                                     0.0   \n",
       "13891                                     0.0   \n",
       "...                                       ...   \n",
       "26837                                     0.0   \n",
       "26838                                     0.0   \n",
       "26839                                     0.0   \n",
       "26840                                     0.0   \n",
       "26841                                     0.0   \n",
       "\n",
       "       __exception__ __exception__ ntallocatevirtualmemory  \\\n",
       "13887                                                0.0     \n",
       "13888                                                0.0     \n",
       "13889                                                0.0     \n",
       "13890                                                0.0     \n",
       "13891                                                0.0     \n",
       "...                                                  ...     \n",
       "26837                                                0.0     \n",
       "26838                                                0.0     \n",
       "26839                                                0.0     \n",
       "26840                                                0.0     \n",
       "26841                                                0.0     \n",
       "\n",
       "       __exception__ __exception__ ntclose  \\\n",
       "13887                                  0.0   \n",
       "13888                                  0.0   \n",
       "13889                                  0.0   \n",
       "13890                                  0.0   \n",
       "13891                                  0.0   \n",
       "...                                    ...   \n",
       "26837                                  0.0   \n",
       "26838                                  0.0   \n",
       "26839                                  0.0   \n",
       "26840                                  0.0   \n",
       "26841                                  0.0   \n",
       "\n",
       "       __exception__ __exception__ ntdelayexecution  \\\n",
       "13887                                           0.0   \n",
       "13888                                           0.0   \n",
       "13889                                           0.0   \n",
       "13890                                           0.0   \n",
       "13891                                           0.0   \n",
       "...                                             ...   \n",
       "26837                                           0.0   \n",
       "26838                                           0.0   \n",
       "26839                                           0.0   \n",
       "26840                                           0.0   \n",
       "26841                                           0.0   \n",
       "\n",
       "       __exception__ couninitialize  \\\n",
       "13887                           0.0   \n",
       "13888                           0.0   \n",
       "13889                           0.0   \n",
       "13890                           0.0   \n",
       "13891                           0.0   \n",
       "...                             ...   \n",
       "26837                           0.0   \n",
       "26838                           0.0   \n",
       "26839                           0.0   \n",
       "26840                           0.0   \n",
       "26841                           0.0   \n",
       "\n",
       "       __exception__ couninitialize ldrunloaddll  __exception__ getsysteminfo  \\\n",
       "13887                                        0.0                          0.0   \n",
       "13888                                        0.0                          0.0   \n",
       "13889                                        0.0                          0.0   \n",
       "13890                                        0.0                          0.0   \n",
       "13891                                        0.0                          0.0   \n",
       "...                                          ...                          ...   \n",
       "26837                                        0.0                          0.0   \n",
       "26838                                        0.0                          0.0   \n",
       "26839                                        0.0                          0.0   \n",
       "26840                                        0.0                          0.0   \n",
       "26841                                        0.0                          0.0   \n",
       "\n",
       "       ...  wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "13887  ...                                           0.000721        \n",
       "13888  ...                                           0.000000        \n",
       "13889  ...                                           0.000000        \n",
       "13890  ...                                           0.000000        \n",
       "13891  ...                                           0.000000        \n",
       "...    ...                                                ...        \n",
       "26837  ...                                           0.000000        \n",
       "26838  ...                                           0.000000        \n",
       "26839  ...                                           0.000000        \n",
       "26840  ...                                           0.000000        \n",
       "26841  ...                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "13887                       0.0                                        0.0   \n",
       "13888                       0.0                                        0.0   \n",
       "13889                       0.0                                        0.0   \n",
       "13890                       0.0                                        0.0   \n",
       "13891                       0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "26837                       0.0                                        0.0   \n",
       "26838                       0.0                                        0.0   \n",
       "26839                       0.0                                        0.0   \n",
       "26840                       0.0                                        0.0   \n",
       "26841                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "13887                                      0.0            0.00192   \n",
       "13888                                      0.0            0.02643   \n",
       "13889                                      0.0            0.00000   \n",
       "13890                                      0.0            0.00000   \n",
       "13891                                      0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "26837                                      0.0            0.00000   \n",
       "26838                                      0.0            0.00000   \n",
       "26839                                      0.0            0.00000   \n",
       "26840                                      0.0            0.00000   \n",
       "26841                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "13887                0.003114                                 0.000000   \n",
       "13888                0.000000                                 0.028152   \n",
       "13889                0.000000                                 0.000000   \n",
       "13890                0.000000                                 0.000000   \n",
       "13891                0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "26837                0.000000                                 0.000000   \n",
       "26838                0.000000                                 0.000000   \n",
       "26839                0.000000                                 0.000000   \n",
       "26840                0.000000                                 0.000000   \n",
       "26841                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "13887               0.002741                                    0.0   \n",
       "13888               0.000000                                    0.0   \n",
       "13889               0.000000                                    0.0   \n",
       "13890               0.000000                                    0.0   \n",
       "13891               0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "26837               0.000000                                    0.0   \n",
       "26838               0.000000                                    0.0   \n",
       "26839               0.000000                                    0.0   \n",
       "26840               0.000000                                    0.0   \n",
       "26841               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "13887                      0.000774  \n",
       "13888                      0.000000  \n",
       "13889                      0.000000  \n",
       "13890                      0.000000  \n",
       "13891                      0.000000  \n",
       "...                             ...  \n",
       "26837                      0.000000  \n",
       "26838                      0.000000  \n",
       "26839                      0.000000  \n",
       "26840                      0.000000  \n",
       "26841                      0.000000  \n",
       "\n",
       "[12955 rows x 7614 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_apis=df_apis[df_apis.index<=13886]\n",
    "df_test_apis=df_apis[df_apis.index>13886]\n",
    "df_test_apis#因为当时生成这个矩阵的时候 是直接拼接的 所以这里根据索引来分别拿到train和test的apis的TFIDF矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>__exception__</th>\n",
       "      <th>__exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ ldrloaddll</th>\n",
       "      <th>__exception__ __exception__ ntallocatevirtualmemory</th>\n",
       "      <th>__exception__ __exception__ ntclose</th>\n",
       "      <th>__exception__ __exception__ ntdelayexecution</th>\n",
       "      <th>__exception__ couninitialize</th>\n",
       "      <th>__exception__ couninitialize ldrunloaddll</th>\n",
       "      <th>__exception__ getsysteminfo</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000291</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13882</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13883</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13884</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13885</th>\n",
       "      <td>0.001301</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13886</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13887 rows × 7614 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       __exception__  __exception__ __exception__  \\\n",
       "0           0.000291                          0.0   \n",
       "1           0.000000                          0.0   \n",
       "2           0.000000                          0.0   \n",
       "3           0.000000                          0.0   \n",
       "4           0.000000                          0.0   \n",
       "...              ...                          ...   \n",
       "13882       0.000000                          0.0   \n",
       "13883       0.000000                          0.0   \n",
       "13884       0.000000                          0.0   \n",
       "13885       0.001301                          0.0   \n",
       "13886       0.000000                          0.0   \n",
       "\n",
       "       __exception__ __exception__ __exception__  \\\n",
       "0                                            0.0   \n",
       "1                                            0.0   \n",
       "2                                            0.0   \n",
       "3                                            0.0   \n",
       "4                                            0.0   \n",
       "...                                          ...   \n",
       "13882                                        0.0   \n",
       "13883                                        0.0   \n",
       "13884                                        0.0   \n",
       "13885                                        0.0   \n",
       "13886                                        0.0   \n",
       "\n",
       "       __exception__ __exception__ ldrloaddll  \\\n",
       "0                                         0.0   \n",
       "1                                         0.0   \n",
       "2                                         0.0   \n",
       "3                                         0.0   \n",
       "4                                         0.0   \n",
       "...                                       ...   \n",
       "13882                                     0.0   \n",
       "13883                                     0.0   \n",
       "13884                                     0.0   \n",
       "13885                                     0.0   \n",
       "13886                                     0.0   \n",
       "\n",
       "       __exception__ __exception__ ntallocatevirtualmemory  \\\n",
       "0                                                    0.0     \n",
       "1                                                    0.0     \n",
       "2                                                    0.0     \n",
       "3                                                    0.0     \n",
       "4                                                    0.0     \n",
       "...                                                  ...     \n",
       "13882                                                0.0     \n",
       "13883                                                0.0     \n",
       "13884                                                0.0     \n",
       "13885                                                0.0     \n",
       "13886                                                0.0     \n",
       "\n",
       "       __exception__ __exception__ ntclose  \\\n",
       "0                                      0.0   \n",
       "1                                      0.0   \n",
       "2                                      0.0   \n",
       "3                                      0.0   \n",
       "4                                      0.0   \n",
       "...                                    ...   \n",
       "13882                                  0.0   \n",
       "13883                                  0.0   \n",
       "13884                                  0.0   \n",
       "13885                                  0.0   \n",
       "13886                                  0.0   \n",
       "\n",
       "       __exception__ __exception__ ntdelayexecution  \\\n",
       "0                                               0.0   \n",
       "1                                               0.0   \n",
       "2                                               0.0   \n",
       "3                                               0.0   \n",
       "4                                               0.0   \n",
       "...                                             ...   \n",
       "13882                                           0.0   \n",
       "13883                                           0.0   \n",
       "13884                                           0.0   \n",
       "13885                                           0.0   \n",
       "13886                                           0.0   \n",
       "\n",
       "       __exception__ couninitialize  \\\n",
       "0                               0.0   \n",
       "1                               0.0   \n",
       "2                               0.0   \n",
       "3                               0.0   \n",
       "4                               0.0   \n",
       "...                             ...   \n",
       "13882                           0.0   \n",
       "13883                           0.0   \n",
       "13884                           0.0   \n",
       "13885                           0.0   \n",
       "13886                           0.0   \n",
       "\n",
       "       __exception__ couninitialize ldrunloaddll  __exception__ getsysteminfo  \\\n",
       "0                                            0.0                          0.0   \n",
       "1                                            0.0                          0.0   \n",
       "2                                            0.0                          0.0   \n",
       "3                                            0.0                          0.0   \n",
       "4                                            0.0                          0.0   \n",
       "...                                          ...                          ...   \n",
       "13882                                        0.0                          0.0   \n",
       "13883                                        0.0                          0.0   \n",
       "13884                                        0.0                          0.0   \n",
       "13885                                        0.0                          0.0   \n",
       "13886                                        0.0                          0.0   \n",
       "\n",
       "       ...  wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "0      ...                                           0.000721        \n",
       "1      ...                                           0.000000        \n",
       "2      ...                                           0.000000        \n",
       "3      ...                                           0.000000        \n",
       "4      ...                                           0.000000        \n",
       "...    ...                                                ...        \n",
       "13882  ...                                           0.000000        \n",
       "13883  ...                                           0.000000        \n",
       "13884  ...                                           0.000000        \n",
       "13885  ...                                           0.000000        \n",
       "13886  ...                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "0                           0.0                                        0.0   \n",
       "1                           0.0                                        0.0   \n",
       "2                           0.0                                        0.0   \n",
       "3                           0.0                                        0.0   \n",
       "4                           0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "13882                       0.0                                        0.0   \n",
       "13883                       0.0                                        0.0   \n",
       "13884                       0.0                                        0.0   \n",
       "13885                       0.0                                        0.0   \n",
       "13886                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "0                                          0.0            0.00192   \n",
       "1                                          0.0            0.02643   \n",
       "2                                          0.0            0.00000   \n",
       "3                                          0.0            0.00000   \n",
       "4                                          0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "13882                                      0.0            0.00000   \n",
       "13883                                      0.0            0.00000   \n",
       "13884                                      0.0            0.00000   \n",
       "13885                                      0.0            0.00000   \n",
       "13886                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "0                    0.003114                                 0.000000   \n",
       "1                    0.000000                                 0.028152   \n",
       "2                    0.000000                                 0.000000   \n",
       "3                    0.000000                                 0.000000   \n",
       "4                    0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "13882                0.000000                                 0.000000   \n",
       "13883                0.000000                                 0.000000   \n",
       "13884                0.000000                                 0.000000   \n",
       "13885                0.000000                                 0.000000   \n",
       "13886                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "0                   0.002741                                    0.0   \n",
       "1                   0.000000                                    0.0   \n",
       "2                   0.000000                                    0.0   \n",
       "3                   0.000000                                    0.0   \n",
       "4                   0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "13882               0.000000                                    0.0   \n",
       "13883               0.000000                                    0.0   \n",
       "13884               0.000000                                    0.0   \n",
       "13885               0.000000                                    0.0   \n",
       "13886               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "0                          0.000774  \n",
       "1                          0.000000  \n",
       "2                          0.000000  \n",
       "3                          0.000000  \n",
       "4                          0.000000  \n",
       "...                             ...  \n",
       "13882                      0.000000  \n",
       "13883                      0.000000  \n",
       "13884                      0.000000  \n",
       "13885                      0.000000  \n",
       "13886                      0.000000  \n",
       "\n",
       "[13887 rows x 7614 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_apis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>__exception__</th>\n",
       "      <th>__exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ ldrloaddll</th>\n",
       "      <th>__exception__ __exception__ ntallocatevirtualmemory</th>\n",
       "      <th>__exception__ __exception__ ntclose</th>\n",
       "      <th>__exception__ __exception__ ntdelayexecution</th>\n",
       "      <th>__exception__ couninitialize</th>\n",
       "      <th>__exception__ couninitialize ldrunloaddll</th>\n",
       "      <th>__exception__ getsysteminfo</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000291</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13882</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13883</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13884</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13885</th>\n",
       "      <td>0.001301</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13886</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13887 rows × 7614 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       __exception__  __exception__ __exception__  \\\n",
       "0           0.000291                          0.0   \n",
       "1           0.000000                          0.0   \n",
       "2           0.000000                          0.0   \n",
       "3           0.000000                          0.0   \n",
       "4           0.000000                          0.0   \n",
       "...              ...                          ...   \n",
       "13882       0.000000                          0.0   \n",
       "13883       0.000000                          0.0   \n",
       "13884       0.000000                          0.0   \n",
       "13885       0.001301                          0.0   \n",
       "13886       0.000000                          0.0   \n",
       "\n",
       "       __exception__ __exception__ __exception__  \\\n",
       "0                                            0.0   \n",
       "1                                            0.0   \n",
       "2                                            0.0   \n",
       "3                                            0.0   \n",
       "4                                            0.0   \n",
       "...                                          ...   \n",
       "13882                                        0.0   \n",
       "13883                                        0.0   \n",
       "13884                                        0.0   \n",
       "13885                                        0.0   \n",
       "13886                                        0.0   \n",
       "\n",
       "       __exception__ __exception__ ldrloaddll  \\\n",
       "0                                         0.0   \n",
       "1                                         0.0   \n",
       "2                                         0.0   \n",
       "3                                         0.0   \n",
       "4                                         0.0   \n",
       "...                                       ...   \n",
       "13882                                     0.0   \n",
       "13883                                     0.0   \n",
       "13884                                     0.0   \n",
       "13885                                     0.0   \n",
       "13886                                     0.0   \n",
       "\n",
       "       __exception__ __exception__ ntallocatevirtualmemory  \\\n",
       "0                                                    0.0     \n",
       "1                                                    0.0     \n",
       "2                                                    0.0     \n",
       "3                                                    0.0     \n",
       "4                                                    0.0     \n",
       "...                                                  ...     \n",
       "13882                                                0.0     \n",
       "13883                                                0.0     \n",
       "13884                                                0.0     \n",
       "13885                                                0.0     \n",
       "13886                                                0.0     \n",
       "\n",
       "       __exception__ __exception__ ntclose  \\\n",
       "0                                      0.0   \n",
       "1                                      0.0   \n",
       "2                                      0.0   \n",
       "3                                      0.0   \n",
       "4                                      0.0   \n",
       "...                                    ...   \n",
       "13882                                  0.0   \n",
       "13883                                  0.0   \n",
       "13884                                  0.0   \n",
       "13885                                  0.0   \n",
       "13886                                  0.0   \n",
       "\n",
       "       __exception__ __exception__ ntdelayexecution  \\\n",
       "0                                               0.0   \n",
       "1                                               0.0   \n",
       "2                                               0.0   \n",
       "3                                               0.0   \n",
       "4                                               0.0   \n",
       "...                                             ...   \n",
       "13882                                           0.0   \n",
       "13883                                           0.0   \n",
       "13884                                           0.0   \n",
       "13885                                           0.0   \n",
       "13886                                           0.0   \n",
       "\n",
       "       __exception__ couninitialize  \\\n",
       "0                               0.0   \n",
       "1                               0.0   \n",
       "2                               0.0   \n",
       "3                               0.0   \n",
       "4                               0.0   \n",
       "...                             ...   \n",
       "13882                           0.0   \n",
       "13883                           0.0   \n",
       "13884                           0.0   \n",
       "13885                           0.0   \n",
       "13886                           0.0   \n",
       "\n",
       "       __exception__ couninitialize ldrunloaddll  __exception__ getsysteminfo  \\\n",
       "0                                            0.0                          0.0   \n",
       "1                                            0.0                          0.0   \n",
       "2                                            0.0                          0.0   \n",
       "3                                            0.0                          0.0   \n",
       "4                                            0.0                          0.0   \n",
       "...                                          ...                          ...   \n",
       "13882                                        0.0                          0.0   \n",
       "13883                                        0.0                          0.0   \n",
       "13884                                        0.0                          0.0   \n",
       "13885                                        0.0                          0.0   \n",
       "13886                                        0.0                          0.0   \n",
       "\n",
       "       ...  wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "0      ...                                           0.000721        \n",
       "1      ...                                           0.000000        \n",
       "2      ...                                           0.000000        \n",
       "3      ...                                           0.000000        \n",
       "4      ...                                           0.000000        \n",
       "...    ...                                                ...        \n",
       "13882  ...                                           0.000000        \n",
       "13883  ...                                           0.000000        \n",
       "13884  ...                                           0.000000        \n",
       "13885  ...                                           0.000000        \n",
       "13886  ...                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "0                           0.0                                        0.0   \n",
       "1                           0.0                                        0.0   \n",
       "2                           0.0                                        0.0   \n",
       "3                           0.0                                        0.0   \n",
       "4                           0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "13882                       0.0                                        0.0   \n",
       "13883                       0.0                                        0.0   \n",
       "13884                       0.0                                        0.0   \n",
       "13885                       0.0                                        0.0   \n",
       "13886                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "0                                          0.0            0.00192   \n",
       "1                                          0.0            0.02643   \n",
       "2                                          0.0            0.00000   \n",
       "3                                          0.0            0.00000   \n",
       "4                                          0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "13882                                      0.0            0.00000   \n",
       "13883                                      0.0            0.00000   \n",
       "13884                                      0.0            0.00000   \n",
       "13885                                      0.0            0.00000   \n",
       "13886                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "0                    0.003114                                 0.000000   \n",
       "1                    0.000000                                 0.028152   \n",
       "2                    0.000000                                 0.000000   \n",
       "3                    0.000000                                 0.000000   \n",
       "4                    0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "13882                0.000000                                 0.000000   \n",
       "13883                0.000000                                 0.000000   \n",
       "13884                0.000000                                 0.000000   \n",
       "13885                0.000000                                 0.000000   \n",
       "13886                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "0                   0.002741                                    0.0   \n",
       "1                   0.000000                                    0.0   \n",
       "2                   0.000000                                    0.0   \n",
       "3                   0.000000                                    0.0   \n",
       "4                   0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "13882               0.000000                                    0.0   \n",
       "13883               0.000000                                    0.0   \n",
       "13884               0.000000                                    0.0   \n",
       "13885               0.000000                                    0.0   \n",
       "13886               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "0                          0.000774  \n",
       "1                          0.000000  \n",
       "2                          0.000000  \n",
       "3                          0.000000  \n",
       "4                          0.000000  \n",
       "...                             ...  \n",
       "13882                      0.000000  \n",
       "13883                      0.000000  \n",
       "13884                      0.000000  \n",
       "13885                      0.000000  \n",
       "13886                      0.000000  \n",
       "\n",
       "[13887 rows x 7614 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_apis.index=range(len(df_train))#\n",
    "df_train_apis#重新修改了一下索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>...</th>\n",
       "      <th>tid_std</th>\n",
       "      <th>tid_ptp</th>\n",
       "      <th>index_count</th>\n",
       "      <th>index_nunique</th>\n",
       "      <th>index_min</th>\n",
       "      <th>index_max</th>\n",
       "      <th>index_mean</th>\n",
       "      <th>index_median</th>\n",
       "      <th>index_std</th>\n",
       "      <th>index_ptp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>135</td>\n",
       "      <td>2488</td>\n",
       "      <td>0</td>\n",
       "      <td>6786</td>\n",
       "      <td>116</td>\n",
       "      <td>6</td>\n",
       "      <td>298</td>\n",
       "      <td>171.965223</td>\n",
       "      <td>...</td>\n",
       "      <td>83.881299</td>\n",
       "      <td>324</td>\n",
       "      <td>6786</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>2000.806955</td>\n",
       "      <td>1607.5</td>\n",
       "      <td>1510.694221</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>2320</td>\n",
       "      <td>0</td>\n",
       "      <td>816</td>\n",
       "      <td>30</td>\n",
       "      <td>89</td>\n",
       "      <td>298</td>\n",
       "      <td>159.696078</td>\n",
       "      <td>...</td>\n",
       "      <td>101.506783</td>\n",
       "      <td>284</td>\n",
       "      <td>816</td>\n",
       "      <td>204</td>\n",
       "      <td>0</td>\n",
       "      <td>203</td>\n",
       "      <td>101.500000</td>\n",
       "      <td>101.5</td>\n",
       "      <td>58.925137</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2208</td>\n",
       "      <td>0</td>\n",
       "      <td>463</td>\n",
       "      <td>42</td>\n",
       "      <td>9</td>\n",
       "      <td>258</td>\n",
       "      <td>164.948164</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>463</td>\n",
       "      <td>463</td>\n",
       "      <td>0</td>\n",
       "      <td>462</td>\n",
       "      <td>231.000000</td>\n",
       "      <td>231.0</td>\n",
       "      <td>133.800847</td>\n",
       "      <td>462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>95</td>\n",
       "      <td>2284</td>\n",
       "      <td>0</td>\n",
       "      <td>2046</td>\n",
       "      <td>51</td>\n",
       "      <td>9</td>\n",
       "      <td>257</td>\n",
       "      <td>154.939883</td>\n",
       "      <td>...</td>\n",
       "      <td>150.460506</td>\n",
       "      <td>696</td>\n",
       "      <td>2046</td>\n",
       "      <td>1028</td>\n",
       "      <td>0</td>\n",
       "      <td>1027</td>\n",
       "      <td>511.012219</td>\n",
       "      <td>511.0</td>\n",
       "      <td>295.407885</td>\n",
       "      <td>1027</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>249</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>10002</td>\n",
       "      <td>65</td>\n",
       "      <td>6</td>\n",
       "      <td>254</td>\n",
       "      <td>201.893421</td>\n",
       "      <td>...</td>\n",
       "      <td>49.556301</td>\n",
       "      <td>176</td>\n",
       "      <td>10002</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>2500.000000</td>\n",
       "      <td>2500.0</td>\n",
       "      <td>1443.736493</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13882</th>\n",
       "      <td>13883</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>178221</td>\n",
       "      <td>71</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>156.643100</td>\n",
       "      <td>...</td>\n",
       "      <td>1405.045515</td>\n",
       "      <td>6468</td>\n",
       "      <td>178221</td>\n",
       "      <td>5001</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "      <td>401.480987</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1008.636040</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13883</th>\n",
       "      <td>13884</td>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2592</td>\n",
       "      <td>0</td>\n",
       "      <td>1319</td>\n",
       "      <td>39</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>163.025019</td>\n",
       "      <td>...</td>\n",
       "      <td>4.295386</td>\n",
       "      <td>156</td>\n",
       "      <td>1319</td>\n",
       "      <td>1319</td>\n",
       "      <td>0</td>\n",
       "      <td>1318</td>\n",
       "      <td>659.000000</td>\n",
       "      <td>659.0</td>\n",
       "      <td>380.906813</td>\n",
       "      <td>1318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13884</th>\n",
       "      <td>13885</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2240</td>\n",
       "      <td>0</td>\n",
       "      <td>1033</td>\n",
       "      <td>71</td>\n",
       "      <td>8</td>\n",
       "      <td>259</td>\n",
       "      <td>174.896418</td>\n",
       "      <td>...</td>\n",
       "      <td>33.152020</td>\n",
       "      <td>504</td>\n",
       "      <td>1033</td>\n",
       "      <td>1033</td>\n",
       "      <td>0</td>\n",
       "      <td>1032</td>\n",
       "      <td>516.000000</td>\n",
       "      <td>516.0</td>\n",
       "      <td>298.345717</td>\n",
       "      <td>1032</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13885</th>\n",
       "      <td>13886</td>\n",
       "      <td>1</td>\n",
       "      <td>95</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5316</td>\n",
       "      <td>80</td>\n",
       "      <td>9</td>\n",
       "      <td>281</td>\n",
       "      <td>168.313017</td>\n",
       "      <td>...</td>\n",
       "      <td>154.796790</td>\n",
       "      <td>512</td>\n",
       "      <td>5316</td>\n",
       "      <td>2503</td>\n",
       "      <td>0</td>\n",
       "      <td>2502</td>\n",
       "      <td>1173.050414</td>\n",
       "      <td>1165.5</td>\n",
       "      <td>755.545651</td>\n",
       "      <td>2502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13886</th>\n",
       "      <td>13887</td>\n",
       "      <td>2</td>\n",
       "      <td>135</td>\n",
       "      <td>2336</td>\n",
       "      <td>0</td>\n",
       "      <td>623</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>277</td>\n",
       "      <td>139.784912</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>623</td>\n",
       "      <td>623</td>\n",
       "      <td>0</td>\n",
       "      <td>622</td>\n",
       "      <td>311.000000</td>\n",
       "      <td>311.0</td>\n",
       "      <td>179.988889</td>\n",
       "      <td>622</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13887 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       file_id  label  api   tid  index  api_count  api_nunique  api_min  \\\n",
       "0            1      5  135  2488      0       6786          116        6   \n",
       "1            2      2   95  2320      0        816           30       89   \n",
       "2            3      0  151  2208      0        463           42        9   \n",
       "3            4      0   95  2284      0       2046           51        9   \n",
       "4            5      0  249  2500      0      10002           65        6   \n",
       "...        ...    ...  ...   ...    ...        ...          ...      ...   \n",
       "13882    13883      2   95   100      0     178221           71        6   \n",
       "13883    13884      5   95  2592      0       1319           39        6   \n",
       "13884    13885      0  151  2240      0       1033           71        8   \n",
       "13885    13886      1   95  2324      0       5316           80        9   \n",
       "13886    13887      2  135  2336      0        623           37       11   \n",
       "\n",
       "       api_max    api_mean  ...      tid_std  tid_ptp  index_count  \\\n",
       "0          298  171.965223  ...    83.881299      324         6786   \n",
       "1          298  159.696078  ...   101.506783      284          816   \n",
       "2          258  164.948164  ...     0.000000        0          463   \n",
       "3          257  154.939883  ...   150.460506      696         2046   \n",
       "4          254  201.893421  ...    49.556301      176        10002   \n",
       "...        ...         ...  ...          ...      ...          ...   \n",
       "13882      279  156.643100  ...  1405.045515     6468       178221   \n",
       "13883      279  163.025019  ...     4.295386      156         1319   \n",
       "13884      259  174.896418  ...    33.152020      504         1033   \n",
       "13885      281  168.313017  ...   154.796790      512         5316   \n",
       "13886      277  139.784912  ...     0.000000        0          623   \n",
       "\n",
       "       index_nunique  index_min  index_max   index_mean  index_median  \\\n",
       "0               5001          0       5000  2000.806955        1607.5   \n",
       "1                204          0        203   101.500000         101.5   \n",
       "2                463          0        462   231.000000         231.0   \n",
       "3               1028          0       1027   511.012219         511.0   \n",
       "4               5001          0       5000  2500.000000        2500.0   \n",
       "...              ...        ...        ...          ...           ...   \n",
       "13882           5001          0       5000   401.480987          47.0   \n",
       "13883           1319          0       1318   659.000000         659.0   \n",
       "13884           1033          0       1032   516.000000         516.0   \n",
       "13885           2503          0       2502  1173.050414        1165.5   \n",
       "13886            623          0        622   311.000000         311.0   \n",
       "\n",
       "         index_std  index_ptp  \n",
       "0      1510.694221       5000  \n",
       "1        58.925137        203  \n",
       "2       133.800847        462  \n",
       "3       295.407885       1027  \n",
       "4      1443.736493       5000  \n",
       "...            ...        ...  \n",
       "13882  1008.636040       5000  \n",
       "13883   380.906813       1318  \n",
       "13884   298.345717       1032  \n",
       "13885   755.545651       2502  \n",
       "13886   179.988889        622  \n",
       "\n",
       "[13887 rows x 29 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#因为待会儿要合并 所以原来的df_train的索引也要更改\n",
    "df_train.index=range(len(df_train))\n",
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>api_median</th>\n",
       "      <th>...</th>\n",
       "      <th>tid_std</th>\n",
       "      <th>tid_ptp</th>\n",
       "      <th>index_count</th>\n",
       "      <th>index_nunique</th>\n",
       "      <th>index_min</th>\n",
       "      <th>index_max</th>\n",
       "      <th>index_mean</th>\n",
       "      <th>index_median</th>\n",
       "      <th>index_std</th>\n",
       "      <th>index_ptp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>226</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>97</td>\n",
       "      <td>15</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>155.989691</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>57.218548</td>\n",
       "      <td>236</td>\n",
       "      <td>97</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>14.443299</td>\n",
       "      <td>14.0</td>\n",
       "      <td>9.210466</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>226</td>\n",
       "      <td>2472</td>\n",
       "      <td>0</td>\n",
       "      <td>1361</td>\n",
       "      <td>40</td>\n",
       "      <td>6</td>\n",
       "      <td>261</td>\n",
       "      <td>138.025716</td>\n",
       "      <td>138.0</td>\n",
       "      <td>...</td>\n",
       "      <td>104.399149</td>\n",
       "      <td>276</td>\n",
       "      <td>1361</td>\n",
       "      <td>681</td>\n",
       "      <td>0</td>\n",
       "      <td>680</td>\n",
       "      <td>339.750184</td>\n",
       "      <td>340.0</td>\n",
       "      <td>196.515744</td>\n",
       "      <td>680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>95</td>\n",
       "      <td>2344</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>9</td>\n",
       "      <td>16</td>\n",
       "      <td>257</td>\n",
       "      <td>111.375000</td>\n",
       "      <td>134.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>7.5</td>\n",
       "      <td>4.760952</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>135</td>\n",
       "      <td>2452</td>\n",
       "      <td>0</td>\n",
       "      <td>193</td>\n",
       "      <td>34</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>172.217617</td>\n",
       "      <td>170.0</td>\n",
       "      <td>...</td>\n",
       "      <td>50.951508</td>\n",
       "      <td>132</td>\n",
       "      <td>193</td>\n",
       "      <td>193</td>\n",
       "      <td>0</td>\n",
       "      <td>192</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>96.0</td>\n",
       "      <td>55.858452</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>803</td>\n",
       "      <td>34</td>\n",
       "      <td>16</td>\n",
       "      <td>261</td>\n",
       "      <td>168.490660</td>\n",
       "      <td>153.0</td>\n",
       "      <td>...</td>\n",
       "      <td>201.826813</td>\n",
       "      <td>448</td>\n",
       "      <td>803</td>\n",
       "      <td>268</td>\n",
       "      <td>0</td>\n",
       "      <td>267</td>\n",
       "      <td>133.333748</td>\n",
       "      <td>133.0</td>\n",
       "      <td>77.317048</td>\n",
       "      <td>267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12950</th>\n",
       "      <td>12951</td>\n",
       "      <td>151</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>289</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "      <td>269</td>\n",
       "      <td>140.536332</td>\n",
       "      <td>151.0</td>\n",
       "      <td>...</td>\n",
       "      <td>75.402526</td>\n",
       "      <td>336</td>\n",
       "      <td>289</td>\n",
       "      <td>145</td>\n",
       "      <td>0</td>\n",
       "      <td>144</td>\n",
       "      <td>71.750865</td>\n",
       "      <td>72.0</td>\n",
       "      <td>41.786414</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12951</th>\n",
       "      <td>12952</td>\n",
       "      <td>151</td>\n",
       "      <td>2264</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>28</td>\n",
       "      <td>56</td>\n",
       "      <td>261</td>\n",
       "      <td>163.669643</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>112</td>\n",
       "      <td>0</td>\n",
       "      <td>111</td>\n",
       "      <td>55.500000</td>\n",
       "      <td>55.5</td>\n",
       "      <td>32.475632</td>\n",
       "      <td>111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12952</th>\n",
       "      <td>12953</td>\n",
       "      <td>135</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5095</td>\n",
       "      <td>72</td>\n",
       "      <td>6</td>\n",
       "      <td>286</td>\n",
       "      <td>200.063199</td>\n",
       "      <td>214.0</td>\n",
       "      <td>...</td>\n",
       "      <td>196.695730</td>\n",
       "      <td>560</td>\n",
       "      <td>5095</td>\n",
       "      <td>1464</td>\n",
       "      <td>0</td>\n",
       "      <td>1463</td>\n",
       "      <td>538.423749</td>\n",
       "      <td>454.0</td>\n",
       "      <td>393.605016</td>\n",
       "      <td>1463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12953</th>\n",
       "      <td>12954</td>\n",
       "      <td>135</td>\n",
       "      <td>2424</td>\n",
       "      <td>0</td>\n",
       "      <td>2951</td>\n",
       "      <td>65</td>\n",
       "      <td>9</td>\n",
       "      <td>298</td>\n",
       "      <td>191.007794</td>\n",
       "      <td>139.0</td>\n",
       "      <td>...</td>\n",
       "      <td>126.124152</td>\n",
       "      <td>276</td>\n",
       "      <td>2951</td>\n",
       "      <td>1445</td>\n",
       "      <td>0</td>\n",
       "      <td>1444</td>\n",
       "      <td>596.701796</td>\n",
       "      <td>555.0</td>\n",
       "      <td>397.358069</td>\n",
       "      <td>1444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12954</th>\n",
       "      <td>12955</td>\n",
       "      <td>135</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>2038</td>\n",
       "      <td>54</td>\n",
       "      <td>13</td>\n",
       "      <td>284</td>\n",
       "      <td>208.845927</td>\n",
       "      <td>266.0</td>\n",
       "      <td>...</td>\n",
       "      <td>78.912837</td>\n",
       "      <td>240</td>\n",
       "      <td>2038</td>\n",
       "      <td>1451</td>\n",
       "      <td>0</td>\n",
       "      <td>1450</td>\n",
       "      <td>560.742885</td>\n",
       "      <td>431.5</td>\n",
       "      <td>440.983364</td>\n",
       "      <td>1450</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       file_id  api   tid  index  api_count  api_nunique  api_min  api_max  \\\n",
       "0            1  226  2332      0         97           15       13      262   \n",
       "1            2  226  2472      0       1361           40        6      261   \n",
       "2            3   95  2344      0         16            9       16      257   \n",
       "3            4  135  2452      0        193           34       13      262   \n",
       "4            5   95  2332      0        803           34       16      261   \n",
       "...        ...  ...   ...    ...        ...          ...      ...      ...   \n",
       "12950    12951  151  2644      0        289           37        9      269   \n",
       "12951    12952  151  2264      0        112           28       56      261   \n",
       "12952    12953  135  2324      0       5095           72        6      286   \n",
       "12953    12954  135  2424      0       2951           65        9      298   \n",
       "12954    12955  135  2500      0       2038           54       13      284   \n",
       "\n",
       "         api_mean  api_median  ...     tid_std  tid_ptp  index_count  \\\n",
       "0      155.989691       152.0  ...   57.218548      236           97   \n",
       "1      138.025716       138.0  ...  104.399149      276         1361   \n",
       "2      111.375000       134.0  ...    0.000000        0           16   \n",
       "3      172.217617       170.0  ...   50.951508      132          193   \n",
       "4      168.490660       153.0  ...  201.826813      448          803   \n",
       "...           ...         ...  ...         ...      ...          ...   \n",
       "12950  140.536332       151.0  ...   75.402526      336          289   \n",
       "12951  163.669643       152.0  ...    0.000000        0          112   \n",
       "12952  200.063199       214.0  ...  196.695730      560         5095   \n",
       "12953  191.007794       139.0  ...  126.124152      276         2951   \n",
       "12954  208.845927       266.0  ...   78.912837      240         2038   \n",
       "\n",
       "       index_nunique  index_min  index_max  index_mean  index_median  \\\n",
       "0                 31          0         30   14.443299          14.0   \n",
       "1                681          0        680  339.750184         340.0   \n",
       "2                 16          0         15    7.500000           7.5   \n",
       "3                193          0        192   96.000000          96.0   \n",
       "4                268          0        267  133.333748         133.0   \n",
       "...              ...        ...        ...         ...           ...   \n",
       "12950            145          0        144   71.750865          72.0   \n",
       "12951            112          0        111   55.500000          55.5   \n",
       "12952           1464          0       1463  538.423749         454.0   \n",
       "12953           1445          0       1444  596.701796         555.0   \n",
       "12954           1451          0       1450  560.742885         431.5   \n",
       "\n",
       "        index_std  index_ptp  \n",
       "0        9.210466         30  \n",
       "1      196.515744        680  \n",
       "2        4.760952         15  \n",
       "3       55.858452        192  \n",
       "4       77.317048        267  \n",
       "...           ...        ...  \n",
       "12950   41.786414        144  \n",
       "12951   32.475632        111  \n",
       "12952  393.605016       1463  \n",
       "12953  397.358069       1444  \n",
       "12954  440.983364       1450  \n",
       "\n",
       "[12955 rows x 28 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.index=range(len(df_test))\n",
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>__exception__</th>\n",
       "      <th>__exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ __exception__</th>\n",
       "      <th>__exception__ __exception__ ldrloaddll</th>\n",
       "      <th>__exception__ __exception__ ntallocatevirtualmemory</th>\n",
       "      <th>__exception__ __exception__ ntclose</th>\n",
       "      <th>__exception__ __exception__ ntdelayexecution</th>\n",
       "      <th>__exception__ couninitialize</th>\n",
       "      <th>__exception__ couninitialize ldrunloaddll</th>\n",
       "      <th>__exception__ getsysteminfo</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000291</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12950</th>\n",
       "      <td>0.000343</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12951</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12952</th>\n",
       "      <td>0.000351</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12953</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12954</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 7614 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       __exception__  __exception__ __exception__  \\\n",
       "0           0.000291                          0.0   \n",
       "1           0.000000                          0.0   \n",
       "2           0.000000                          0.0   \n",
       "3           0.000000                          0.0   \n",
       "4           0.000000                          0.0   \n",
       "...              ...                          ...   \n",
       "12950       0.000343                          0.0   \n",
       "12951       0.000000                          0.0   \n",
       "12952       0.000351                          0.0   \n",
       "12953       0.000000                          0.0   \n",
       "12954       0.000000                          0.0   \n",
       "\n",
       "       __exception__ __exception__ __exception__  \\\n",
       "0                                            0.0   \n",
       "1                                            0.0   \n",
       "2                                            0.0   \n",
       "3                                            0.0   \n",
       "4                                            0.0   \n",
       "...                                          ...   \n",
       "12950                                        0.0   \n",
       "12951                                        0.0   \n",
       "12952                                        0.0   \n",
       "12953                                        0.0   \n",
       "12954                                        0.0   \n",
       "\n",
       "       __exception__ __exception__ ldrloaddll  \\\n",
       "0                                         0.0   \n",
       "1                                         0.0   \n",
       "2                                         0.0   \n",
       "3                                         0.0   \n",
       "4                                         0.0   \n",
       "...                                       ...   \n",
       "12950                                     0.0   \n",
       "12951                                     0.0   \n",
       "12952                                     0.0   \n",
       "12953                                     0.0   \n",
       "12954                                     0.0   \n",
       "\n",
       "       __exception__ __exception__ ntallocatevirtualmemory  \\\n",
       "0                                                    0.0     \n",
       "1                                                    0.0     \n",
       "2                                                    0.0     \n",
       "3                                                    0.0     \n",
       "4                                                    0.0     \n",
       "...                                                  ...     \n",
       "12950                                                0.0     \n",
       "12951                                                0.0     \n",
       "12952                                                0.0     \n",
       "12953                                                0.0     \n",
       "12954                                                0.0     \n",
       "\n",
       "       __exception__ __exception__ ntclose  \\\n",
       "0                                      0.0   \n",
       "1                                      0.0   \n",
       "2                                      0.0   \n",
       "3                                      0.0   \n",
       "4                                      0.0   \n",
       "...                                    ...   \n",
       "12950                                  0.0   \n",
       "12951                                  0.0   \n",
       "12952                                  0.0   \n",
       "12953                                  0.0   \n",
       "12954                                  0.0   \n",
       "\n",
       "       __exception__ __exception__ ntdelayexecution  \\\n",
       "0                                               0.0   \n",
       "1                                               0.0   \n",
       "2                                               0.0   \n",
       "3                                               0.0   \n",
       "4                                               0.0   \n",
       "...                                             ...   \n",
       "12950                                           0.0   \n",
       "12951                                           0.0   \n",
       "12952                                           0.0   \n",
       "12953                                           0.0   \n",
       "12954                                           0.0   \n",
       "\n",
       "       __exception__ couninitialize  \\\n",
       "0                               0.0   \n",
       "1                               0.0   \n",
       "2                               0.0   \n",
       "3                               0.0   \n",
       "4                               0.0   \n",
       "...                             ...   \n",
       "12950                           0.0   \n",
       "12951                           0.0   \n",
       "12952                           0.0   \n",
       "12953                           0.0   \n",
       "12954                           0.0   \n",
       "\n",
       "       __exception__ couninitialize ldrunloaddll  __exception__ getsysteminfo  \\\n",
       "0                                            0.0                          0.0   \n",
       "1                                            0.0                          0.0   \n",
       "2                                            0.0                          0.0   \n",
       "3                                            0.0                          0.0   \n",
       "4                                            0.0                          0.0   \n",
       "...                                          ...                          ...   \n",
       "12950                                        0.0                          0.0   \n",
       "12951                                        0.0                          0.0   \n",
       "12952                                        0.0                          0.0   \n",
       "12953                                        0.0                          0.0   \n",
       "12954                                        0.0                          0.0   \n",
       "\n",
       "       ...  wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "0      ...                                           0.000721        \n",
       "1      ...                                           0.000000        \n",
       "2      ...                                           0.000000        \n",
       "3      ...                                           0.000000        \n",
       "4      ...                                           0.000000        \n",
       "...    ...                                                ...        \n",
       "12950  ...                                           0.000000        \n",
       "12951  ...                                           0.000000        \n",
       "12952  ...                                           0.000000        \n",
       "12953  ...                                           0.000000        \n",
       "12954  ...                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "0                           0.0                                        0.0   \n",
       "1                           0.0                                        0.0   \n",
       "2                           0.0                                        0.0   \n",
       "3                           0.0                                        0.0   \n",
       "4                           0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "12950                       0.0                                        0.0   \n",
       "12951                       0.0                                        0.0   \n",
       "12952                       0.0                                        0.0   \n",
       "12953                       0.0                                        0.0   \n",
       "12954                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "0                                          0.0            0.00192   \n",
       "1                                          0.0            0.02643   \n",
       "2                                          0.0            0.00000   \n",
       "3                                          0.0            0.00000   \n",
       "4                                          0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "12950                                      0.0            0.00000   \n",
       "12951                                      0.0            0.00000   \n",
       "12952                                      0.0            0.00000   \n",
       "12953                                      0.0            0.00000   \n",
       "12954                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "0                    0.003114                                 0.000000   \n",
       "1                    0.000000                                 0.028152   \n",
       "2                    0.000000                                 0.000000   \n",
       "3                    0.000000                                 0.000000   \n",
       "4                    0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "12950                0.000000                                 0.000000   \n",
       "12951                0.000000                                 0.000000   \n",
       "12952                0.000000                                 0.000000   \n",
       "12953                0.000000                                 0.000000   \n",
       "12954                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "0                   0.002741                                    0.0   \n",
       "1                   0.000000                                    0.0   \n",
       "2                   0.000000                                    0.0   \n",
       "3                   0.000000                                    0.0   \n",
       "4                   0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "12950               0.000000                                    0.0   \n",
       "12951               0.000000                                    0.0   \n",
       "12952               0.000000                                    0.0   \n",
       "12953               0.000000                                    0.0   \n",
       "12954               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "0                          0.000774  \n",
       "1                          0.000000  \n",
       "2                          0.000000  \n",
       "3                          0.000000  \n",
       "4                          0.000000  \n",
       "...                             ...  \n",
       "12950                      0.000000  \n",
       "12951                      0.000000  \n",
       "12952                      0.000000  \n",
       "12953                      0.000000  \n",
       "12954                      0.000000  \n",
       "\n",
       "[12955 rows x 7614 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test_apis.index=range(len(df_test))\n",
    "df_test_apis#重新修改了一下索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>label</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>135</td>\n",
       "      <td>2488</td>\n",
       "      <td>0</td>\n",
       "      <td>6786</td>\n",
       "      <td>116</td>\n",
       "      <td>6</td>\n",
       "      <td>298</td>\n",
       "      <td>171.965223</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>2320</td>\n",
       "      <td>0</td>\n",
       "      <td>816</td>\n",
       "      <td>30</td>\n",
       "      <td>89</td>\n",
       "      <td>298</td>\n",
       "      <td>159.696078</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2208</td>\n",
       "      <td>0</td>\n",
       "      <td>463</td>\n",
       "      <td>42</td>\n",
       "      <td>9</td>\n",
       "      <td>258</td>\n",
       "      <td>164.948164</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>95</td>\n",
       "      <td>2284</td>\n",
       "      <td>0</td>\n",
       "      <td>2046</td>\n",
       "      <td>51</td>\n",
       "      <td>9</td>\n",
       "      <td>257</td>\n",
       "      <td>154.939883</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>249</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>10002</td>\n",
       "      <td>65</td>\n",
       "      <td>6</td>\n",
       "      <td>254</td>\n",
       "      <td>201.893421</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13882</th>\n",
       "      <td>13883</td>\n",
       "      <td>2</td>\n",
       "      <td>95</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>178221</td>\n",
       "      <td>71</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>156.643100</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13883</th>\n",
       "      <td>13884</td>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2592</td>\n",
       "      <td>0</td>\n",
       "      <td>1319</td>\n",
       "      <td>39</td>\n",
       "      <td>6</td>\n",
       "      <td>279</td>\n",
       "      <td>163.025019</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13884</th>\n",
       "      <td>13885</td>\n",
       "      <td>0</td>\n",
       "      <td>151</td>\n",
       "      <td>2240</td>\n",
       "      <td>0</td>\n",
       "      <td>1033</td>\n",
       "      <td>71</td>\n",
       "      <td>8</td>\n",
       "      <td>259</td>\n",
       "      <td>174.896418</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13885</th>\n",
       "      <td>13886</td>\n",
       "      <td>1</td>\n",
       "      <td>95</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5316</td>\n",
       "      <td>80</td>\n",
       "      <td>9</td>\n",
       "      <td>281</td>\n",
       "      <td>168.313017</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13886</th>\n",
       "      <td>13887</td>\n",
       "      <td>2</td>\n",
       "      <td>135</td>\n",
       "      <td>2336</td>\n",
       "      <td>0</td>\n",
       "      <td>623</td>\n",
       "      <td>37</td>\n",
       "      <td>11</td>\n",
       "      <td>277</td>\n",
       "      <td>139.784912</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13887 rows × 7643 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       file_id  label  api   tid  index  api_count  api_nunique  api_min  \\\n",
       "0            1      5  135  2488      0       6786          116        6   \n",
       "1            2      2   95  2320      0        816           30       89   \n",
       "2            3      0  151  2208      0        463           42        9   \n",
       "3            4      0   95  2284      0       2046           51        9   \n",
       "4            5      0  249  2500      0      10002           65        6   \n",
       "...        ...    ...  ...   ...    ...        ...          ...      ...   \n",
       "13882    13883      2   95   100      0     178221           71        6   \n",
       "13883    13884      5   95  2592      0       1319           39        6   \n",
       "13884    13885      0  151  2240      0       1033           71        8   \n",
       "13885    13886      1   95  2324      0       5316           80        9   \n",
       "13886    13887      2  135  2336      0        623           37       11   \n",
       "\n",
       "       api_max    api_mean  ...  \\\n",
       "0          298  171.965223  ...   \n",
       "1          298  159.696078  ...   \n",
       "2          258  164.948164  ...   \n",
       "3          257  154.939883  ...   \n",
       "4          254  201.893421  ...   \n",
       "...        ...         ...  ...   \n",
       "13882      279  156.643100  ...   \n",
       "13883      279  163.025019  ...   \n",
       "13884      259  174.896418  ...   \n",
       "13885      281  168.313017  ...   \n",
       "13886      277  139.784912  ...   \n",
       "\n",
       "       wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "0                                               0.000721        \n",
       "1                                               0.000000        \n",
       "2                                               0.000000        \n",
       "3                                               0.000000        \n",
       "4                                               0.000000        \n",
       "...                                                  ...        \n",
       "13882                                           0.000000        \n",
       "13883                                           0.000000        \n",
       "13884                                           0.000000        \n",
       "13885                                           0.000000        \n",
       "13886                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "0                           0.0                                        0.0   \n",
       "1                           0.0                                        0.0   \n",
       "2                           0.0                                        0.0   \n",
       "3                           0.0                                        0.0   \n",
       "4                           0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "13882                       0.0                                        0.0   \n",
       "13883                       0.0                                        0.0   \n",
       "13884                       0.0                                        0.0   \n",
       "13885                       0.0                                        0.0   \n",
       "13886                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "0                                          0.0            0.00192   \n",
       "1                                          0.0            0.02643   \n",
       "2                                          0.0            0.00000   \n",
       "3                                          0.0            0.00000   \n",
       "4                                          0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "13882                                      0.0            0.00000   \n",
       "13883                                      0.0            0.00000   \n",
       "13884                                      0.0            0.00000   \n",
       "13885                                      0.0            0.00000   \n",
       "13886                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "0                    0.003114                                 0.000000   \n",
       "1                    0.000000                                 0.028152   \n",
       "2                    0.000000                                 0.000000   \n",
       "3                    0.000000                                 0.000000   \n",
       "4                    0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "13882                0.000000                                 0.000000   \n",
       "13883                0.000000                                 0.000000   \n",
       "13884                0.000000                                 0.000000   \n",
       "13885                0.000000                                 0.000000   \n",
       "13886                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "0                   0.002741                                    0.0   \n",
       "1                   0.000000                                    0.0   \n",
       "2                   0.000000                                    0.0   \n",
       "3                   0.000000                                    0.0   \n",
       "4                   0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "13882               0.000000                                    0.0   \n",
       "13883               0.000000                                    0.0   \n",
       "13884               0.000000                                    0.0   \n",
       "13885               0.000000                                    0.0   \n",
       "13886               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "0                          0.000774  \n",
       "1                          0.000000  \n",
       "2                          0.000000  \n",
       "3                          0.000000  \n",
       "4                          0.000000  \n",
       "...                             ...  \n",
       "13882                      0.000000  \n",
       "13883                      0.000000  \n",
       "13884                      0.000000  \n",
       "13885                      0.000000  \n",
       "13886                      0.000000  \n",
       "\n",
       "[13887 rows x 7643 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#把df_train_apis合并到df_train里面去\n",
    "df_train=df_train.merge(df_train_apis,left_index=True,right_index=True)\n",
    "df_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_id</th>\n",
       "      <th>api</th>\n",
       "      <th>tid</th>\n",
       "      <th>index</th>\n",
       "      <th>api_count</th>\n",
       "      <th>api_nunique</th>\n",
       "      <th>api_min</th>\n",
       "      <th>api_max</th>\n",
       "      <th>api_mean</th>\n",
       "      <th>api_median</th>\n",
       "      <th>...</th>\n",
       "      <th>wsastartup ntallocatevirtualmemory ntfreevirtualmemory</th>\n",
       "      <th>wsastartup regopenkeyexa</th>\n",
       "      <th>wsastartup regopenkeyexa regqueryvalueexa</th>\n",
       "      <th>wsastartup regopenkeyexa regsetvalueexa</th>\n",
       "      <th>wsastartup socket</th>\n",
       "      <th>wsastartup socket bind</th>\n",
       "      <th>wsastartup socket ntqueryattributesfile</th>\n",
       "      <th>wsastartup wsastartup</th>\n",
       "      <th>wsastartup wsastartup findresourceexw</th>\n",
       "      <th>wsastartup wsastartup socket</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>226</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>97</td>\n",
       "      <td>15</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>155.989691</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000721</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00192</td>\n",
       "      <td>0.003114</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.002741</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>226</td>\n",
       "      <td>2472</td>\n",
       "      <td>0</td>\n",
       "      <td>1361</td>\n",
       "      <td>40</td>\n",
       "      <td>6</td>\n",
       "      <td>261</td>\n",
       "      <td>138.025716</td>\n",
       "      <td>138.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.02643</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.028152</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>95</td>\n",
       "      <td>2344</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>9</td>\n",
       "      <td>16</td>\n",
       "      <td>257</td>\n",
       "      <td>111.375000</td>\n",
       "      <td>134.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>135</td>\n",
       "      <td>2452</td>\n",
       "      <td>0</td>\n",
       "      <td>193</td>\n",
       "      <td>34</td>\n",
       "      <td>13</td>\n",
       "      <td>262</td>\n",
       "      <td>172.217617</td>\n",
       "      <td>170.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>95</td>\n",
       "      <td>2332</td>\n",
       "      <td>0</td>\n",
       "      <td>803</td>\n",
       "      <td>34</td>\n",
       "      <td>16</td>\n",
       "      <td>261</td>\n",
       "      <td>168.490660</td>\n",
       "      <td>153.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12950</th>\n",
       "      <td>12951</td>\n",
       "      <td>151</td>\n",
       "      <td>2644</td>\n",
       "      <td>0</td>\n",
       "      <td>289</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "      <td>269</td>\n",
       "      <td>140.536332</td>\n",
       "      <td>151.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12951</th>\n",
       "      <td>12952</td>\n",
       "      <td>151</td>\n",
       "      <td>2264</td>\n",
       "      <td>0</td>\n",
       "      <td>112</td>\n",
       "      <td>28</td>\n",
       "      <td>56</td>\n",
       "      <td>261</td>\n",
       "      <td>163.669643</td>\n",
       "      <td>152.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12952</th>\n",
       "      <td>12953</td>\n",
       "      <td>135</td>\n",
       "      <td>2324</td>\n",
       "      <td>0</td>\n",
       "      <td>5095</td>\n",
       "      <td>72</td>\n",
       "      <td>6</td>\n",
       "      <td>286</td>\n",
       "      <td>200.063199</td>\n",
       "      <td>214.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12953</th>\n",
       "      <td>12954</td>\n",
       "      <td>135</td>\n",
       "      <td>2424</td>\n",
       "      <td>0</td>\n",
       "      <td>2951</td>\n",
       "      <td>65</td>\n",
       "      <td>9</td>\n",
       "      <td>298</td>\n",
       "      <td>191.007794</td>\n",
       "      <td>139.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12954</th>\n",
       "      <td>12955</td>\n",
       "      <td>135</td>\n",
       "      <td>2500</td>\n",
       "      <td>0</td>\n",
       "      <td>2038</td>\n",
       "      <td>54</td>\n",
       "      <td>13</td>\n",
       "      <td>284</td>\n",
       "      <td>208.845927</td>\n",
       "      <td>266.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 7642 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       file_id  api   tid  index  api_count  api_nunique  api_min  api_max  \\\n",
       "0            1  226  2332      0         97           15       13      262   \n",
       "1            2  226  2472      0       1361           40        6      261   \n",
       "2            3   95  2344      0         16            9       16      257   \n",
       "3            4  135  2452      0        193           34       13      262   \n",
       "4            5   95  2332      0        803           34       16      261   \n",
       "...        ...  ...   ...    ...        ...          ...      ...      ...   \n",
       "12950    12951  151  2644      0        289           37        9      269   \n",
       "12951    12952  151  2264      0        112           28       56      261   \n",
       "12952    12953  135  2324      0       5095           72        6      286   \n",
       "12953    12954  135  2424      0       2951           65        9      298   \n",
       "12954    12955  135  2500      0       2038           54       13      284   \n",
       "\n",
       "         api_mean  api_median  ...  \\\n",
       "0      155.989691       152.0  ...   \n",
       "1      138.025716       138.0  ...   \n",
       "2      111.375000       134.0  ...   \n",
       "3      172.217617       170.0  ...   \n",
       "4      168.490660       153.0  ...   \n",
       "...           ...         ...  ...   \n",
       "12950  140.536332       151.0  ...   \n",
       "12951  163.669643       152.0  ...   \n",
       "12952  200.063199       214.0  ...   \n",
       "12953  191.007794       139.0  ...   \n",
       "12954  208.845927       266.0  ...   \n",
       "\n",
       "       wsastartup ntallocatevirtualmemory ntfreevirtualmemory  \\\n",
       "0                                               0.000721        \n",
       "1                                               0.000000        \n",
       "2                                               0.000000        \n",
       "3                                               0.000000        \n",
       "4                                               0.000000        \n",
       "...                                                  ...        \n",
       "12950                                           0.000000        \n",
       "12951                                           0.000000        \n",
       "12952                                           0.000000        \n",
       "12953                                           0.000000        \n",
       "12954                                           0.000000        \n",
       "\n",
       "       wsastartup regopenkeyexa  wsastartup regopenkeyexa regqueryvalueexa  \\\n",
       "0                           0.0                                        0.0   \n",
       "1                           0.0                                        0.0   \n",
       "2                           0.0                                        0.0   \n",
       "3                           0.0                                        0.0   \n",
       "4                           0.0                                        0.0   \n",
       "...                         ...                                        ...   \n",
       "12950                       0.0                                        0.0   \n",
       "12951                       0.0                                        0.0   \n",
       "12952                       0.0                                        0.0   \n",
       "12953                       0.0                                        0.0   \n",
       "12954                       0.0                                        0.0   \n",
       "\n",
       "       wsastartup regopenkeyexa regsetvalueexa  wsastartup socket  \\\n",
       "0                                          0.0            0.00192   \n",
       "1                                          0.0            0.02643   \n",
       "2                                          0.0            0.00000   \n",
       "3                                          0.0            0.00000   \n",
       "4                                          0.0            0.00000   \n",
       "...                                        ...                ...   \n",
       "12950                                      0.0            0.00000   \n",
       "12951                                      0.0            0.00000   \n",
       "12952                                      0.0            0.00000   \n",
       "12953                                      0.0            0.00000   \n",
       "12954                                      0.0            0.00000   \n",
       "\n",
       "       wsastartup socket bind  wsastartup socket ntqueryattributesfile  \\\n",
       "0                    0.003114                                 0.000000   \n",
       "1                    0.000000                                 0.028152   \n",
       "2                    0.000000                                 0.000000   \n",
       "3                    0.000000                                 0.000000   \n",
       "4                    0.000000                                 0.000000   \n",
       "...                       ...                                      ...   \n",
       "12950                0.000000                                 0.000000   \n",
       "12951                0.000000                                 0.000000   \n",
       "12952                0.000000                                 0.000000   \n",
       "12953                0.000000                                 0.000000   \n",
       "12954                0.000000                                 0.000000   \n",
       "\n",
       "       wsastartup wsastartup  wsastartup wsastartup findresourceexw  \\\n",
       "0                   0.002741                                    0.0   \n",
       "1                   0.000000                                    0.0   \n",
       "2                   0.000000                                    0.0   \n",
       "3                   0.000000                                    0.0   \n",
       "4                   0.000000                                    0.0   \n",
       "...                      ...                                    ...   \n",
       "12950               0.000000                                    0.0   \n",
       "12951               0.000000                                    0.0   \n",
       "12952               0.000000                                    0.0   \n",
       "12953               0.000000                                    0.0   \n",
       "12954               0.000000                                    0.0   \n",
       "\n",
       "       wsastartup wsastartup socket  \n",
       "0                          0.000774  \n",
       "1                          0.000000  \n",
       "2                          0.000000  \n",
       "3                          0.000000  \n",
       "4                          0.000000  \n",
       "...                             ...  \n",
       "12950                      0.000000  \n",
       "12951                      0.000000  \n",
       "12952                      0.000000  \n",
       "12953                      0.000000  \n",
       "12954                      0.000000  \n",
       "\n",
       "[12955 rows x 7642 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test=df_test.merge(df_test_apis,left_index=True,right_index=True)\n",
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去除特征之后df_trian的长度 7640\n"
     ]
    }
   ],
   "source": [
    "#已经合并完了 这个时候去除一些没有用的列  有几个列已经没有意义了 比如index api tid\n",
    "df_train.drop(['api','tid','index'],axis=1,inplace=True)\n",
    "print(\"去除特征之后df_trian的长度\",len(df_train.columns))#相比原来的少了三个  同样测试集也要这样做一版"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去除特征之后df_test的长度 7639\n"
     ]
    }
   ],
   "source": [
    "df_test.drop(['api','tid','index'],axis=1,inplace=True)\n",
    "print(\"去除特征之后df_test的长度\",len(df_test.columns))#同样也是少了三个"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集的特征数据维度: (13887, 7638) \t标签维度: (13887,)\n"
     ]
    }
   ],
   "source": [
    "#这个时候准备开始构建训练集和测试集了  \n",
    "#训练集的标签就是那个lable 所以这里要去掉出了lable那一列之后 剩余的就是训练集 \n",
    "df_train_x=df_train.drop(['file_id','label'],axis=1)\n",
    "#这里用了一个新变量接受  索引inplace不用等于True  file_id应该也是在上一步进行去掉的 无用特征\n",
    "df_train_y=df_train['label']\n",
    "print(\"训练集的特征数据维度:\",df_train_x.shape,\"\\t标签维度:\",df_train_y.shape)#这样就算是构建好了训练集 特征以及标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "测试集的特征数据维度: (12955, 7638)\n"
     ]
    }
   ],
   "source": [
    "df_test_x=df_test.drop(['file_id'],axis=1)\n",
    "print(\"测试集的特征数据维度:\",df_test_x.shape)#这样就算是构建好了测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "39"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看一下是否有缺失值\n",
    "sum(df_train_x.isnull().sum())#这里竟然有39个缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "66"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df_test_x.isnull().sum())#这里竟然有66个缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "缺失值所在的列以及确实的个数: api_std 13\n",
      "缺失值所在的列以及确实的个数: tid_std 13\n",
      "缺失值所在的列以及确实的个数: index_std 13\n"
     ]
    }
   ],
   "source": [
    "#去找一下缺失值在哪里\n",
    "for col in df_train_x.columns:\n",
    "    num=df_train_x[col].isnull().sum()\n",
    "    if num>0:\n",
    "        print(\"缺失值所在的列以及确实的个数:\",col,num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "缺失值所在的列以及确实的个数: api_std 22\n",
      "缺失值所在的列以及确实的个数: tid_std 22\n",
      "缺失值所在的列以及确实的个数: index_std 22\n"
     ]
    }
   ],
   "source": [
    "#去找一下缺失值在哪里\n",
    "for col in df_test_x.columns:\n",
    "    num=df_test_x[col].isnull().sum()\n",
    "    if num>0:\n",
    "        print(\"缺失值所在的列以及确实的个数:\",col,num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 0\n"
     ]
    }
   ],
   "source": [
    "#训练集和测试集都有缺失值  这里用0进行一下填充吧 \n",
    "df_train_x.fillna(0,inplace=True)\n",
    "df_test_x.fillna(0,inplace=True)\n",
    "print(sum(df_train_x.isnull().sum()),sum(df_test_x.isnull().sum()))#这里已经没有缺失值了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "神经网络模型导入完毕\n"
     ]
    }
   ],
   "source": [
    "#上面训练集和数据集已经训练好了 开始构建神经网络\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "print(\"神经网络模型导入完毕\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 1., 0., 0.],\n",
       "       [0., 0., 1., ..., 0., 0., 0.],\n",
       "       [1., 0., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [1., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 1., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 1., ..., 0., 0., 0.]], dtype=float32)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#这个函数可以把训练集的标签y给转成one_hot 编码\n",
    "to_categorical(df_train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5 [0. 0. 0. 0. 0. 1. 0. 0.]\n",
      "2 [0. 0. 1. 0. 0. 0. 0. 0.]\n",
      "0 [1. 0. 0. 0. 0. 0. 0. 0.]\n"
     ]
    }
   ],
   "source": [
    "print(df_train_y[0],to_categorical(df_train_y)[0])\n",
    "print(df_train_y[1],to_categorical(df_train_y)[1])\n",
    "print(df_train_y[2],to_categorical(df_train_y)[2])#果然是把原来的标签转成了ont_hot编码 之所以这么做是因为神经网络的需要"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "神经网络导入完毕\n"
     ]
    }
   ],
   "source": [
    "#准备搭建网络\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense,Dropout\n",
    "print(\"神经网络导入完毕\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "WARNING:tensorflow:From /home/stu_15527388015/.local/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.cast instead.\n",
      "Epoch 1/10\n",
      "13887/13887 [==============================] - 8s 554us/sample - loss: 11.6421 - acc: 0.2767\n",
      "Epoch 2/10\n",
      "13887/13887 [==============================] - 8s 547us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 3/10\n",
      "13887/13887 [==============================] - 8s 559us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 4/10\n",
      "13887/13887 [==============================] - 8s 560us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 5/10\n",
      "13887/13887 [==============================] - 8s 546us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 6/10\n",
      "13887/13887 [==============================] - 8s 544us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 7/10\n",
      "13887/13887 [==============================] - 8s 541us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 8/10\n",
      "13887/13887 [==============================] - 8s 544us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 9/10\n",
      "13887/13887 [==============================] - 8s 560us/sample - loss: 11.1400 - acc: 0.3088\n",
      "Epoch 10/10\n",
      "13887/13887 [==============================] - 8s 542us/sample - loss: 11.1400 - acc: 0.3088\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7ff6b0142190>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model=keras.Sequential([\n",
    "    keras.layers.Dense(300, activation='relu',input_shape=[df_train_x.shape[1]]),\n",
    "    keras.layers.Dense(200, activation='relu'),\n",
    "    keras.layers.Dense(100, activation='relu'),\n",
    "    keras.layers.Dense(8, activation='softmax')\n",
    "])#搭建了4层网络  Dense表示全连接 relu是激活哈数  输出是softmax转换成多分类\n",
    "#定义优化器\n",
    "model.compile(loss=\"categorical_crossentropy\",metrics=['accuracy'],optimizer='adam')#优化器用adam\n",
    "#开始模型的训练\n",
    "model.fit(df_train_x,to_categorical(df_train_y),batch_size=1024,epochs=10,verbose=1)\n",
    "#训练集、测试集（转成one_hot）、batch_size批次，epochs单次训练迭代 verbose=1时，带进度条的输入日志信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "api_count                                  6786.000000\n",
       "api_nunique                                 116.000000\n",
       "api_min                                       6.000000\n",
       "api_max                                     298.000000\n",
       "api_mean                                    171.965223\n",
       "                                              ...     \n",
       "wsastartup socket bind                        0.003114\n",
       "wsastartup socket ntqueryattributesfile       0.000000\n",
       "wsastartup wsastartup                         0.002741\n",
       "wsastartup wsastartup findresourceexw         0.000000\n",
       "wsastartup wsastartup socket                  0.000774\n",
       "Name: 0, Length: 7638, dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_x.loc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "标准化之前的数据:\n",
      " 训练集： api_count                                  6786.000000\n",
      "api_nunique                                 116.000000\n",
      "api_min                                       6.000000\n",
      "api_max                                     298.000000\n",
      "api_mean                                    171.965223\n",
      "                                              ...     \n",
      "wsastartup socket bind                        0.003114\n",
      "wsastartup socket ntqueryattributesfile       0.000000\n",
      "wsastartup wsastartup                         0.002741\n",
      "wsastartup wsastartup findresourceexw         0.000000\n",
      "wsastartup wsastartup socket                  0.000774\n",
      "Name: 0, Length: 7638, dtype: float64 \n",
      "测试集： api_count                                   97.000000\n",
      "api_nunique                                 15.000000\n",
      "api_min                                     13.000000\n",
      "api_max                                    262.000000\n",
      "api_mean                                   155.989691\n",
      "                                              ...    \n",
      "wsastartup socket bind                       0.003114\n",
      "wsastartup socket ntqueryattributesfile      0.000000\n",
      "wsastartup wsastartup                        0.002741\n",
      "wsastartup wsastartup findresourceexw        0.000000\n",
      "wsastartup wsastartup socket                 0.000774\n",
      "Name: 0, Length: 7638, dtype: float64\n",
      "标准化之后的数据:\n",
      " 训练集： [ 0.01648754  2.19977423 -0.48318014 ...  0.35365974 -0.03272193\n",
      "  0.44287973] \n",
      "测试集： [-0.37855997 -1.13418329 -0.34741732 ...  0.34030401 -0.03226852\n",
      "  0.42608666]\n"
     ]
    }
   ],
   "source": [
    "#从这里看来 效果并不好  前面特征归一化一下 然后做一下网络的调整\n",
    "print(\"标准化之前的数据:\\n\",\"训练集：\",df_train_x.iloc[0],'\\n测试集：',df_test_x.iloc[0])\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "scaler=StandardScaler()\n",
    "df_train_x=scaler.fit_transform(df_train_x)\n",
    "df_test_x=scaler.fit_transform(df_test_x)\n",
    "print(\"标准化之后的数据:\\n\",\"训练集：\",df_train_x[0],'\\n测试集：',df_test_x[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "13887/13887 [==============================] - 8s 575us/sample - loss: 0.9337 - acc: 0.7197\n",
      "Epoch 2/10\n",
      "13887/13887 [==============================] - 8s 557us/sample - loss: 0.3834 - acc: 0.8748\n",
      "Epoch 3/10\n",
      "13887/13887 [==============================] - 8s 564us/sample - loss: 0.2890 - acc: 0.9022\n",
      "Epoch 4/10\n",
      "13887/13887 [==============================] - 8s 542us/sample - loss: 0.2406 - acc: 0.9184\n",
      "Epoch 5/10\n",
      "13887/13887 [==============================] - 8s 560us/sample - loss: 0.2094 - acc: 0.9273\n",
      "Epoch 6/10\n",
      "13887/13887 [==============================] - 8s 557us/sample - loss: 0.1986 - acc: 0.9296\n",
      "Epoch 7/10\n",
      "13887/13887 [==============================] - 8s 541us/sample - loss: 0.1807 - acc: 0.9389\n",
      "Epoch 8/10\n",
      "13887/13887 [==============================] - 8s 560us/sample - loss: 0.1649 - acc: 0.9412\n",
      "Epoch 9/10\n",
      "13887/13887 [==============================] - 8s 545us/sample - loss: 0.1588 - acc: 0.9448\n",
      "Epoch 10/10\n",
      "13887/13887 [==============================] - 8s 546us/sample - loss: 0.1499 - acc: 0.9466\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.keras.callbacks.History at 0x7ff688285e90>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model=keras.Sequential([\n",
    "    keras.layers.Dense(300, activation='relu',input_shape=[df_train_x.shape[1]]),\n",
    "    keras.layers.Dense(200, activation='relu'),\n",
    "    keras.layers.Dense(100, activation='relu'),\n",
    "    keras.layers.Dense(8, activation='softmax')\n",
    "])#搭建了4层网络  Dense表示全连接 relu是激活哈数  输出是softmax转换成多分类\n",
    "#定义优化器\n",
    "model.compile(loss=\"categorical_crossentropy\",metrics=['accuracy'],optimizer='adam')#优化器用adam\n",
    "#开始模型的训练\n",
    "model.fit(df_train_x,to_categorical(df_train_y),batch_size=1024,epochs=10,verbose=1)\n",
    "#训练集、测试集（转成one_hot）、batch_size批次，epochs单次训练迭代 verbose=1时，带进度条的输入日志信息"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###### 根据结果来看 数据标准化了一下 准确度大大提高了 但是会不会是过拟合了呢  需要通过提交结果来看看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2.83940416e-03, 1.11261485e-07, 4.20960805e-06, ...,\n",
       "        9.94911253e-01, 2.41799944e-06, 2.20077951e-03],\n",
       "       [6.60606304e-07, 8.83545070e-10, 9.99991894e-01, ...,\n",
       "        2.47372300e-09, 8.26070101e-10, 7.39896541e-06],\n",
       "       [7.81939864e-01, 1.47974224e-05, 1.01967737e-04, ...,\n",
       "        2.15260819e-01, 2.42571491e-06, 1.24318420e-03],\n",
       "       ...,\n",
       "       [2.83680383e-05, 5.41259476e-04, 3.52554419e-03, ...,\n",
       "        9.11940634e-01, 5.75906225e-02, 1.84180774e-02],\n",
       "       [9.99998331e-01, 1.71918202e-09, 1.48422563e-09, ...,\n",
       "        1.71835111e-07, 8.52263704e-10, 1.29200350e-06],\n",
       "       [2.00710826e-08, 5.50754053e-10, 1.45498813e-08, ...,\n",
       "        9.99999881e-01, 5.67915315e-09, 8.79490898e-08]], dtype=float32)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#模型预测\n",
    "result=model.predict(df_test_x)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prob0</th>\n",
       "      <th>prob1</th>\n",
       "      <th>prob2</th>\n",
       "      <th>prob3</th>\n",
       "      <th>prob4</th>\n",
       "      <th>prob5</th>\n",
       "      <th>prob6</th>\n",
       "      <th>prob7</th>\n",
       "      <th>file_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2.839404e-03</td>\n",
       "      <td>1.112615e-07</td>\n",
       "      <td>4.209608e-06</td>\n",
       "      <td>3.688586e-05</td>\n",
       "      <td>4.993621e-06</td>\n",
       "      <td>9.949113e-01</td>\n",
       "      <td>2.417999e-06</td>\n",
       "      <td>2.200780e-03</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6.606063e-07</td>\n",
       "      <td>8.835451e-10</td>\n",
       "      <td>9.999919e-01</td>\n",
       "      <td>1.197413e-09</td>\n",
       "      <td>1.877395e-09</td>\n",
       "      <td>2.473723e-09</td>\n",
       "      <td>8.260701e-10</td>\n",
       "      <td>7.398965e-06</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.819399e-01</td>\n",
       "      <td>1.479742e-05</td>\n",
       "      <td>1.019677e-04</td>\n",
       "      <td>3.764217e-04</td>\n",
       "      <td>1.060480e-03</td>\n",
       "      <td>2.152608e-01</td>\n",
       "      <td>2.425715e-06</td>\n",
       "      <td>1.243184e-03</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.126022e-01</td>\n",
       "      <td>2.438484e-06</td>\n",
       "      <td>8.724187e-02</td>\n",
       "      <td>9.002551e-06</td>\n",
       "      <td>1.635372e-06</td>\n",
       "      <td>4.775825e-07</td>\n",
       "      <td>1.273539e-04</td>\n",
       "      <td>1.499150e-05</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9.999295e-01</td>\n",
       "      <td>7.292653e-07</td>\n",
       "      <td>4.518267e-07</td>\n",
       "      <td>2.276787e-06</td>\n",
       "      <td>3.873608e-07</td>\n",
       "      <td>5.152918e-05</td>\n",
       "      <td>1.731165e-06</td>\n",
       "      <td>1.336117e-05</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12950</th>\n",
       "      <td>6.107310e-05</td>\n",
       "      <td>4.483204e-04</td>\n",
       "      <td>3.431274e-03</td>\n",
       "      <td>1.176889e-03</td>\n",
       "      <td>4.856829e-03</td>\n",
       "      <td>9.407701e-01</td>\n",
       "      <td>3.129462e-02</td>\n",
       "      <td>1.796076e-02</td>\n",
       "      <td>12951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12951</th>\n",
       "      <td>9.999019e-01</td>\n",
       "      <td>1.049769e-07</td>\n",
       "      <td>2.479937e-05</td>\n",
       "      <td>1.312828e-05</td>\n",
       "      <td>1.717969e-07</td>\n",
       "      <td>9.347106e-07</td>\n",
       "      <td>3.644142e-05</td>\n",
       "      <td>2.258456e-05</td>\n",
       "      <td>12952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12952</th>\n",
       "      <td>2.836804e-05</td>\n",
       "      <td>5.412595e-04</td>\n",
       "      <td>3.525544e-03</td>\n",
       "      <td>1.494212e-03</td>\n",
       "      <td>6.461292e-03</td>\n",
       "      <td>9.119406e-01</td>\n",
       "      <td>5.759062e-02</td>\n",
       "      <td>1.841808e-02</td>\n",
       "      <td>12953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12953</th>\n",
       "      <td>9.999983e-01</td>\n",
       "      <td>1.719182e-09</td>\n",
       "      <td>1.484226e-09</td>\n",
       "      <td>1.678858e-07</td>\n",
       "      <td>1.654712e-09</td>\n",
       "      <td>1.718351e-07</td>\n",
       "      <td>8.522637e-10</td>\n",
       "      <td>1.292004e-06</td>\n",
       "      <td>12954</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12954</th>\n",
       "      <td>2.007108e-08</td>\n",
       "      <td>5.507541e-10</td>\n",
       "      <td>1.454988e-08</td>\n",
       "      <td>2.041598e-09</td>\n",
       "      <td>2.722524e-09</td>\n",
       "      <td>9.999999e-01</td>\n",
       "      <td>5.679153e-09</td>\n",
       "      <td>8.794909e-08</td>\n",
       "      <td>12955</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12955 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              prob0         prob1         prob2         prob3         prob4  \\\n",
       "0      2.839404e-03  1.112615e-07  4.209608e-06  3.688586e-05  4.993621e-06   \n",
       "1      6.606063e-07  8.835451e-10  9.999919e-01  1.197413e-09  1.877395e-09   \n",
       "2      7.819399e-01  1.479742e-05  1.019677e-04  3.764217e-04  1.060480e-03   \n",
       "3      9.126022e-01  2.438484e-06  8.724187e-02  9.002551e-06  1.635372e-06   \n",
       "4      9.999295e-01  7.292653e-07  4.518267e-07  2.276787e-06  3.873608e-07   \n",
       "...             ...           ...           ...           ...           ...   \n",
       "12950  6.107310e-05  4.483204e-04  3.431274e-03  1.176889e-03  4.856829e-03   \n",
       "12951  9.999019e-01  1.049769e-07  2.479937e-05  1.312828e-05  1.717969e-07   \n",
       "12952  2.836804e-05  5.412595e-04  3.525544e-03  1.494212e-03  6.461292e-03   \n",
       "12953  9.999983e-01  1.719182e-09  1.484226e-09  1.678858e-07  1.654712e-09   \n",
       "12954  2.007108e-08  5.507541e-10  1.454988e-08  2.041598e-09  2.722524e-09   \n",
       "\n",
       "              prob5         prob6         prob7  file_id  \n",
       "0      9.949113e-01  2.417999e-06  2.200780e-03        1  \n",
       "1      2.473723e-09  8.260701e-10  7.398965e-06        2  \n",
       "2      2.152608e-01  2.425715e-06  1.243184e-03        3  \n",
       "3      4.775825e-07  1.273539e-04  1.499150e-05        4  \n",
       "4      5.152918e-05  1.731165e-06  1.336117e-05        5  \n",
       "...             ...           ...           ...      ...  \n",
       "12950  9.407701e-01  3.129462e-02  1.796076e-02    12951  \n",
       "12951  9.347106e-07  3.644142e-05  2.258456e-05    12952  \n",
       "12952  9.119406e-01  5.759062e-02  1.841808e-02    12953  \n",
       "12953  1.718351e-07  8.522637e-10  1.292004e-06    12954  \n",
       "12954  9.999999e-01  5.679153e-09  8.794909e-08    12955  \n",
       "\n",
       "[12955 rows x 9 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result=pd.DataFrame(result,columns=['prob0', 'prob1', 'prob2', 'prob3', 'prob4', 'prob5', 'prob6', 'prob7'])\n",
    "result['file_id']=df_test['file_id'].values\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns=['file_id','prob0', 'prob1', 'prob2', 'prob3', 'prob4', 'prob5', 'prob6', 'prob7']\n",
    "result.to_csv('./keras_nn1.csv', index=False,columns=columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 从提交的结果来看 效果并不好，可能是过拟合了 这里先不纠结了 知道神经网络的方法就OK了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
